A de-idendified dataset was provided for this study by the app Kindara, which is dedicated to provide menstruating individual with a platform to digitally track their cycles and associated fertility signs.
knitr::include_graphics("../Figures Tables Media/Media/kindara_screen.PNG")knitr::opts_chunk$set(echo = TRUE, cache = TRUE)Transform CSV into feather files
input_folder = paste0(IO$input_data, "Days/")
output_folder = paste0(IO$tmp_data,"Days_feather_from_csv/")
if(!dir.exists(output_folder)){dir.create(output_folder)}
files = list.files(input_folder)
tic()
cl = makeCluster(par$n_cores, outfile="")
registerDoParallel(cl)
users = foreach(file = files, .combine = rbind, .packages = c("feather","readr","plyr","dplyr")) %dopar%{
#days = read.csv(paste0(input_folder,file), stringsAsFactors = FALSE)
days = read_tsv(file = paste0(input_folder,file),
col_types = cols(
id = col_character(),
date = col_date(format = "%Y-%m-%d %H:%M:%S"),
first_day = col_logical(),
conception = col_logical(),
temperature = col_double(),
temp_time = col_datetime(format = ""),
temp_source = col_integer(),
questionable_temp = col_logical(),
no_fluid = col_logical(),
fluid_sticky = col_integer(),
fluid_creamy = col_integer(),
fluid_eggwhite = col_integer(),
fluid_watery = col_integer(),
cervix_height = col_integer(),
cervix_openness = col_integer(),
cervix_firmness = col_integer(),
opk = col_integer(),
preg_test = col_integer(),
ferning = col_skip(),
prg_test = col_skip(),
menstruation = col_integer(),
spotting = col_logical(),
sex = col_integer(),
vaginal_sensation = col_skip(), #col_integer(),
custom = col_character(),
moods = col_character(),
symptoms = col_character()
))
# colnames
colnames(days)[colnames(days) == "id"] = "user_id"
# identifying
users_with_pos_preg_tests = unique(days$user_id[which(days$preg_test == 1)])
users = data.frame(user_id = users_with_pos_preg_tests, kindara_csv_file = file)
# formating pregnancy tests
days = mutate(days,
preg_test_o = preg_test,
preg_test = ifelse(preg_test == 2, -1, preg_test))
new_file_name = gsub("csv","feather",file)
write_feather(days, path = paste0(output_folder,new_file_name))
#save(days, file = paste0(output_folder,new_file_name))
return(users)
}
stopImplicitCluster()
toc()## 108.527 sec elapsed
write_feather(users, path = paste0(IO$tmp_data, "full_list_users_with_pos_preg_tests.feather"))Create a user table from the list of users that ever logged a positive pregnancy test
#users = read_feather(path = paste0(IO$tmp_data, "full_list_users_with_pos_preg_tests.feather"))
users$batch = as.numeric(users$kindara_csv_file)
users_agg = ddply(users,
"user_id",
summarise,
kindara_csv_file = paste0(kindara_csv_file, collapse = ","),
batch = min(batch))
users = users_agg
users$pos_preg_test = TRUE
write_feather(users, path = paste0(IO$output_data, "users.feather"))
ok = file.copy(from = paste0(IO$output_data, "users.feather"), to = paste0(IO$tmp_data, "users_with_pos_preg_tests.feather"), overwrite = TRUE)Filter the days table and re-organize users into batches
input_folder = paste0(IO$tmp_data,"Days_feather_from_csv/")
tmp_folder = paste0(IO$tmp_data,"Days_filtered_split_batches/")
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}
files = list.files(input_folder)
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
ok = foreach(file = files, .packages = "feather") %dopar%{
full_days = read_feather(path = paste0(input_folder,file))
# filtering
full_days = full_days[full_days$user_id %in% users$user_id,]
full_days$input_file_id = file
# split by batches
for(b in unique(users$batch[users$user_id %in% full_days$user_id])){
days = full_days[full_days$user_id %in% users$user_id[users$batch == b],]
days$batch = b
write_feather(days, path = paste0(tmp_folder,"batch_",b,"_",file))
}
}
stopImplicitCluster()input_folder = paste0(IO$tmp_data,"Days_filtered_split_batches/")
output_folder = paste0(IO$output_data,"Days/")
tmp_folder = paste0(IO$tmp_data, "Days_filtered/")
if(dir.exists(input_folder)){unlink(output_folder, recursive = TRUE);dir.create(output_folder)}
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}
files = list.files(input_folder)
input_files = foreach(b = unique(users$batch), .combine = rbind) %do%{
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
batch_files = files[grep(paste0("batch_",b,"_day"), files)]
days = foreach(file = batch_files, .combine = rbind, .packages = "feather") %dopar%{
days = read_feather(path = paste0(input_folder,file))
return(days)
}
stopImplicitCluster()
# checking for duplicated rows
d = duplicated(days)
j = which(d)
if(length(j)>0){
days = days[-j,]
}
dim(days)
write_feather(days, path = paste0(output_folder,"days_",b,".feather"))
file.copy(from = paste0(output_folder,"days_",b,".feather"), to = paste0(tmp_folder,"days_",b,".feather"), overwrite = TRUE)
input_files = aggregate(input_file_id ~ user_id, days, function(x){paste0(unique(sort(x)),collapse = "|")})
return(input_files)
}
save(input_files, file = paste0(IO$tmp_data, "input_files.Rdata"))input_folder = paste0(IO$tmp_data,"Days_filtered/")
output_folder = paste0(IO$output_data,"Days/")
tmp_folder = paste0(IO$tmp_data, "Days_filtered_with_cycles/")
if(dir.exists(input_folder)){unlink(output_folder, recursive = TRUE);dir.create(output_folder)}
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}
files = list.files(input_folder)
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
foreach(file = files, .packages = c("feather","zoo","plyr","dplyr","tictoc","foreach")) %dopar%{
days = read_feather(path = paste0(input_folder,file))
o = order(days$user_id, days$date)
days = days[o,]
days$is_first_day = FALSE
days$first_day_type = NA
days = mutate(days,
day_id = paste0(user_id, "_",date))
user_ids = unique(days$user_id)
tic()
day_ids = foreach(user_id = user_ids, .combine = c)%do%{
this_user_day = days[which((days$user_id == user_id)),]
this_user_day$day = as.numeric(this_user_day$date - min(this_user_day$date))
cycle_starts = find_cycle_starts(this_user_day = this_user_day, debug = FALSE)
# cycle_starts
# this_user_day$day[this_user_day$first_day]
# plot.tracking.history(d = this_user_day, show_tests = TRUE)
# abline(v = this_user_day$date[this_user_day$first_day], col = "green")
# abline(v = this_user_day$date[match(cycle_starts,this_user_day$day)], lty = 3, col = "red")
day_ids = this_user_day$day_id[this_user_day$day %in% cycle_starts]
return(day_ids)
}
toc()
days$is_first_day[days$day_id %in% day_ids] = TRUE
agg = aggregate(date ~ user_id, days, min)
days$first_day_type[days$day_id %in% paste0(agg$user_id,"_",agg$date)] = 0
days$first_day_type[days$is_first_day] = 1
write_feather(days, path = paste0(output_folder,file))
file.copy(from = paste0(output_folder,file), to = paste0(tmp_folder,file), overwrite = TRUE)
}## [[1]]
## [1] TRUE
##
## [[2]]
## [1] TRUE
##
## [[3]]
## [1] TRUE
##
## [[4]]
## [1] TRUE
##
## [[5]]
## [1] TRUE
##
## [[6]]
## [1] TRUE
##
## [[7]]
## [1] TRUE
##
## [[8]]
## [1] TRUE
##
## [[9]]
## [1] TRUE
##
## [[10]]
## [1] TRUE
##
## [[11]]
## [1] TRUE
##
## [[12]]
## [1] TRUE
##
## [[13]]
## [1] TRUE
##
## [[14]]
## [1] TRUE
##
## [[15]]
## [1] TRUE
##
## [[16]]
## [1] TRUE
##
## [[17]]
## [1] TRUE
##
## [[18]]
## [1] TRUE
##
## [[19]]
## [1] TRUE
##
## [[20]]
## [1] TRUE
##
## [[21]]
## [1] TRUE
##
## [[22]]
## [1] TRUE
##
## [[23]]
## [1] TRUE
##
## [[24]]
## [1] TRUE
##
## [[25]]
## [1] TRUE
##
## [[26]]
## [1] TRUE
##
## [[27]]
## [1] TRUE
##
## [[28]]
## [1] TRUE
##
## [[29]]
## [1] TRUE
##
## [[30]]
## [1] TRUE
##
## [[31]]
## [1] TRUE
##
## [[32]]
## [1] TRUE
##
## [[33]]
## [1] TRUE
##
## [[34]]
## [1] TRUE
##
## [[35]]
## [1] TRUE
##
## [[36]]
## [1] TRUE
##
## [[37]]
## [1] TRUE
##
## [[38]]
## [1] TRUE
##
## [[39]]
## [1] TRUE
##
## [[40]]
## [1] TRUE
##
## [[41]]
## [1] TRUE
##
## [[42]]
## [1] TRUE
##
## [[43]]
## [1] TRUE
##
## [[44]]
## [1] TRUE
##
## [[45]]
## [1] TRUE
##
## [[46]]
## [1] TRUE
##
## [[47]]
## [1] TRUE
##
## [[48]]
## [1] TRUE
##
## [[49]]
## [1] TRUE
##
## [[50]]
## [1] TRUE
##
## [[51]]
## [1] TRUE
##
## [[52]]
## [1] TRUE
##
## [[53]]
## [1] TRUE
##
## [[54]]
## [1] TRUE
##
## [[55]]
## [1] TRUE
##
## [[56]]
## [1] TRUE
##
## [[57]]
## [1] TRUE
##
## [[58]]
## [1] TRUE
##
## [[59]]
## [1] TRUE
##
## [[60]]
## [1] TRUE
##
## [[61]]
## [1] TRUE
##
## [[62]]
## [1] TRUE
##
## [[63]]
## [1] TRUE
##
## [[64]]
## [1] TRUE
stopImplicitCluster()We cannot use the cycles table that Kindara provided because we re-defined the cycles. We thus create the cycles from the days table by looking at which days have the flag is_first_day.
#users = read_feather(paste0(IO$output_data,"users.feather"))
days_input_folder = paste0(IO$output_data,"Days/")
days_files = list.files(days_input_folder)
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
cycles = foreach(file = days_files, .combine = rbind, .packages = "feather") %dopar%
{
days = read_feather(path = paste0(days_input_folder,file))
colnames(days)
dim(days)
# creating the cycles table
cycles = days[!is.na(days$first_day_type), c("user_id","date","first_day_type")]
colnames(cycles)[which(colnames(cycles) == "date")] = "start_date"
cycles = cycles[order(cycles$user_id, cycles$start_date),]
j = which(cycles$user_id %in% users$user_id)
length(j)
cycles = cycles[j,]
return(cycles)
}
stopImplicitCluster()
dim(cycles)## [1] 1744191 3
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_first_version.feather"), overwrite = TRUE)## [1] TRUE
We create unique cycle ID in the cycle table
# cycles = read_feather(path = paste0(IO$output_data,"users.Rdata"))
cycles = cycles[order(cycles$user_id, cycles$start_date),]
cycles$cycle_nb = ave(cycles$first_day_type, cycles$user_id, FUN = cumsum)
cycles$cycle_id = paste0(cycles$user_id, "_" ,cycles$cycle_nb)
cycles$end_date = cycles$start_date[match(cycles$cycle_id, paste0(cycles$user_id,"_",cycles$cycle_nb-1))] - 1
cycles$cycle_length = as.numeric(cycles$end_date - cycles$start_date + 1)
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_nb_and_id.feather"), overwrite = TRUE)## [1] TRUE
And associate each row of the days to a cycle
days_folder = paste0(IO$output_data,"Days/")
days_tmp_folder = paste0(IO$tmp_data,"Days_with_cycle_id/")
if(!dir.exists(days_tmp_folder)){dir.create(days_tmp_folder)}
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
days_files = list.files(days_folder)
ok = foreach(file = days_files, .packages = "feather") %dopar%
{
days = read_feather(path = paste0(days_folder,file))
colnames(days)
dim(days)
# take the part of cycles that matches with the days users
j = which(cycles$user_id %in% unique(days$user_id)) #& (!is.na(cycles$cycle_length)))
cycles_sub = cycles[j,]
# for unfinished cycles, we will consider a time-window of 3 years = 1095 days after the start of the cycle to capture information about these on-going cycles.
cycles_sub$cycle_length[which(is.na(cycles_sub$cycle_length))] = 1095
# expand cycles for each day
cycles_sub_exp = as.data.frame(lapply(cycles_sub, rep, cycles_sub$cycle_length))
cycles_sub_exp$cycleday = ave(rep(1,nrow(cycles_sub_exp)), cycles_sub_exp$cycle_id, FUN =cumsum)
cycles_sub_exp$date = cycles_sub_exp$start_date + (cycles_sub_exp$cycleday - 1)
cycles_sub_exp$day_id = paste0(cycles_sub_exp$user_id, "_", cycles_sub_exp$date)
# match days and cycles_sub_exp
days$day_id = paste0(days$user_id, "_", days$date)
m = match(days$day_id, cycles_sub_exp$day_id)
days$cycle_nb = cycles_sub_exp$cycle_nb[m]
days$cycle_id = cycles_sub_exp$cycle_id[m]
days$cycle_length = cycles_sub_exp$cycle_length[m]
days$cycleday = cycles_sub_exp$cycleday[m]
days$cycleday_from_end = days$cycleday - days$cycle_length - 1
write_feather(days, path = paste0(days_folder, file))
file.copy(from = paste0(days_folder, file), to = paste0(days_tmp_folder, file), overwrite = TRUE)
}
stopImplicitCluster()Now we can aggregate the days table to report useful information on the cycles table
aggregate to create the cycles table
n_BBT –v
input_days_folder = paste0(IO$tmp_data,"Days_with_cycle_id/")
output_days_folder = paste0(IO$output_data,"Days/")
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
days_files = list.files(input_days_folder)
cycles_agg = foreach(file = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
days = read_feather(path = paste0(input_days_folder,file))
colnames(days)
dim(days)
#
cycles_agg = ddply(days,
.(cycle_id),
.parallel = FALSE, # FALSE, # TRUE
.fun = summarize,
cycle_length = min(cycle_length),
n_days_obs = lu(date),
last_obs_day = max(cycleday),
n_pos_preg_test = sum(preg_test == 1),
n_neg_preg_test = sum(preg_test == -1),
day_from_end_first_pos_preg_test = min(Inf, cycleday_from_end * (preg_test == 1), na.rm = TRUE),
day_last_pos_preg_test = max(-Inf, cycleday * (preg_test == 1), na.rm = TRUE),
day_last_preg_test = max(-Inf,cycleday * (preg_test %in% c(1,-1)), na.rm = TRUE),
n_tot_sex = sum(sex > 0, na.rm = TRUE),
n_prot_sex = sum(sex == 1, na.rm = TRUE),
n_unprot_sex = sum(sex == 2, na.rm = TRUE),
n_withdrawal = sum(sex == 3, na.rm = TRUE),
n_insemination = sum(sex == 4, na.rm = TRUE),
n_BBT = sum(!is.na(temperature), na.rm = TRUE))
cycles_agg$day_first_pos_preg_test = NA
j = which(cycles_agg$day_from_end_first_pos_preg_test < 0)
cycles_agg$day_first_pos_preg_test[j] = cycles_agg$cycle_length[j] + cycles_agg$day_from_end_first_pos_preg_test[j] + 1
cycles_agg$n_pos_preg_test[is.na(cycles_agg$n_pos_preg_test)] = 0
cycles_agg$n_neg_preg_test[is.na(cycles_agg$n_neg_preg_test)] = 0
cycles_agg$day_first_pos_preg_test[is.infinite(cycles_agg$day_first_pos_preg_test)] = 0
cycles_agg$day_last_pos_preg_test[is.infinite(cycles_agg$day_last_pos_preg_test)] = 0
# n_days_obs_after_first_pos_preg_test
days$day_first_pos_preg_test = cycles_agg$day_first_pos_preg_test[match(days$cycle_id, cycles_agg$cycle_id)]
days$after_first_pos_preg_test = (days$day_first_pos_preg_test > 0) & (days$cycleday > days$day_first_pos_preg_test)
cycles_agg2 = aggregate(after_first_pos_preg_test ~ cycle_id, days, sum, na.rm = TRUE )
cycles_agg$n_days_obs_after_first_pos_preg_test = cycles_agg2$after_first_pos_preg_test[match(cycles_agg$cycle_id, cycles_agg2$cycle_id)]
# last_preg_test
days$day_last_preg_test = cycles_agg$day_last_preg_test[match(days$cycle_id, cycles_agg$cycle_id)]
cycles_agg2 = days[which(days$cycleday == days$day_last_preg_test),]
cycles_agg$last_preg_test = cycles_agg2$preg_test[match(cycles_agg$cycle_id, cycles_agg2$cycle_id)]
cycles_agg$last_preg_test[is.na(cycles_agg$last_preg_test)]= 0
# preg_test_class
#cycles_agg$preg_test_class = ifelse(cycles_agg$n_pos_preg_test>0,ifelse(cycles_agg$last_preg_test == 1, "pregnant","pregnancy loss"), ifelse(cycles_agg$n_neg_preg_test>0,"not pregnant", "not tested"))
cycles_agg$preg_test_class = ifelse(cycles_agg$n_pos_preg_test>0,"pregnant", ifelse(cycles_agg$n_neg_preg_test>0,"not pregnant", "not tested"))
return(cycles_agg)
}
stopImplicitCluster()
write_feather(cycles_agg, path = paste0(IO$tmp_data, "cycles_agg.feather"))column_names = colnames(cycles_agg[,-which(colnames(cycles_agg) %in% c("cycle_id","cycle_length"))])
#column_names = column_names[-which(column_names %in% colnames(cycles))]
m = match(cycles$cycle_id, cycles_agg$cycle_id)
for(column in column_names){
eval(parse(text = paste0("cycles$",column,"= cycles_agg$",column,"[m]")))
#eval(parse(text = paste0("cycles$",column,"[is.na(cycles$",column,")]= 0")))
}
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_agg.feather"), overwrite = TRUE)## [1] TRUE
cycles$preg_type = NA
cycles$preg_type[which((cycles$preg_test_class == "pregnant") & (!is.na(cycles$cycle_length)))] = 1
cycles$preg_type[which((cycles$preg_test_class == "pregnant") & (is.na(cycles$cycle_length)))] = 0
cycles$preg_type[which((cycles$preg_test_class == "pregnant") & (cycles$cycle_nb == 0))] = -1#load(paste0(IO$tmp_data,"users_with_original_file_id.Rdata"),verbose = TRUE)
users_agg = suppressWarnings(
ddply(cycles,
.(user_id),
.fun = summarize,
n_cycles = max(cycle_nb, na.rm = TRUE),
n_days_obs = sum(n_days_obs, na.rm = TRUE),
n_pos_cycles = sum(n_pos_preg_test > 0, na.rm = TRUE),
first_cycle_preg = min(cycle_nb[n_pos_preg_test > 0], na.rm = TRUE),
last_cycle_preg = max(cycle_nb[n_pos_preg_test > 0], na.rm = TRUE)
)
)
users_agg$first_cycle_preg[is.infinite(users_agg$first_cycle_preg)] = 0
users_agg$last_cycle_preg[is.infinite(users_agg$last_cycle_preg)] = Inf
# n_obs_after_last_preg
cycles_tmp = cycles
cycles_tmp$first_cycle_preg = users_agg$first_cycle_preg[match(cycles_tmp$user_id, users_agg$user_id)]
cycles_tmp$last_cycle_preg = users_agg$last_cycle_preg[match(cycles_tmp$user_id, users_agg$user_id)]
users_agg2 = aggregate(n_days_obs ~ user_id, cycles_tmp[cycles_tmp$cycle_nb > cycles_tmp$last_cycle_preg, ], sum, na.rm = TRUE)
users_agg$n_days_obs_after_last_preg = users_agg2$n_days_obs[match(users_agg$user_id, users_agg2$user_id)]
users_agg$n_days_obs_after_last_preg[is.na(users_agg$n_days_obs_after_first_preg )] = 0
users_agg$n_cycles_after_last_preg = users_agg$n_cycles - users_agg$last_cycle_preg
users_agg$n_cycles_after_last_preg[is.infinite(users_agg$n_cycles_after_last_preg)] = 0
# minimal cycle length before the first positive preg test
users_agg2 = aggregate(cycle_length ~ user_id,
cycles_tmp[cycles_tmp$cycle_nb < cycles_tmp$first_cycle_preg, ],
min, na.rm = TRUE)
users_agg$shortest_cycle_before_first_pos_preg = users_agg2$cycle_length[match(users_agg$user_id, users_agg2$user_id)]
# adding new columns to the users table
column_names = colnames(users_agg)
column_names = column_names[-which(column_names %in% colnames(users))]
m = match(users$user_id, users_agg$user_id)
for(column in column_names){
eval(parse(text = paste0("users$",column,"= users_agg$",column,"[m]")))
}
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_agg.feather"), overwrite = TRUE)## [1] TRUE
aggregate
avg, median and sd of cycle_length (cycles before first positive pregnancy tests)
#load(paste0(IO$tmp_data,"users_with_original_file_id.Rdata"),verbose = TRUE)
cycles_tmp = cycles_tmp[cycles_tmp$user_id %in% users$user_id,]
users_agg = suppressWarnings(
ddply(cycles_tmp,
.(user_id),
.fun = summarize,
cycle_length_no_preg_avg = mean(cycle_length[n_pos_preg_test == 0], na.rm = TRUE),
cycle_length_no_preg_median = median(cycle_length[n_pos_preg_test == 0], na.rm = TRUE),
cycle_length_no_preg_sd = sd(cycle_length[n_pos_preg_test == 0], na.rm = TRUE),
cycle_length_before_preg_avg = mean(cycle_length[cycle_nb < first_cycle_preg], na.rm = TRUE),
cycle_length_before_preg_median = median(cycle_length[cycle_nb < first_cycle_preg], na.rm = TRUE),
cycle_length_before_preg_sd = sd(cycle_length[cycle_nb < first_cycle_preg], na.rm = TRUE))
)
column_names = colnames(users_agg)
column_names = column_names[-which(column_names %in% c("user_id"))]
m = match(users$user_id, users_agg$user_id)
for(column in column_names){
eval(parse(text = paste0("users$",column,"= users_agg$",column,"[m]")))
}
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_cycle_length_stats.feather"), overwrite = TRUE)## [1] TRUE
input_days_folder = paste0(IO$output_data,"Days/")
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
days_files = list.files(input_days_folder)
users_agg = foreach(file = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
days = read_feather(path = paste0(input_days_folder,file))
users_agg = ddply(days,
.(user_id),
.parallel = TRUE, # FALSE, # TRUE
.fun = summarize,
n_pos_preg_tests = sum(preg_test == 1),
earliest_date = min(date, na.rm = TRUE),
latest_date = max(date, na.rm = TRUE))
return(users_agg)
}
stopImplicitCluster()
write_feather(users_agg, path = paste0(IO$tmp_data, "users_agg_earliest_and_latest_dates.feather"))column_names = colnames(users_agg)
column_names = column_names[-which(column_names %in% colnames(users))]
m = match(users$user_id, users_agg$user_id)
for(column in column_names){
eval(parse(text = paste0("users$",column,"= users_agg$",column,"[m]")))
}
users$app_usage_duration_in_days = as.numeric(users$latest_date - users$earliest_date)
users$app_usage_duration_in_years = users$app_usage_duration_in_days/365
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_earliest_and_latest_date.feather"), overwrite = TRUE)## [1] TRUE
accounts = read_tsv(paste0(IO$input_data,"accounts.csv"))## Parsed with column specification:
## cols(
## id = col_character(),
## objective = col_character(),
## birth_day = col_datetime(format = ""),
## average_cycle = col_double(),
## average_luteal = col_double(),
## average_period = col_double(),
## avr_follicular_temp = col_logical(),
## avr_luteal_temp = col_logical()
## )
accounts$birth_year = year(accounts$birth_day)
users$birth_year = accounts$birth_year[match(users$user_id, accounts$id)]
users$age_now = year(today()) - users$birth_year
users$age_now[(users$age_now<15)|(users$age_now>55)] = NA
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_age.feather"), overwrite = TRUE)## [1] TRUE
cycles$birth_year = users$birth_year[match(cycles$user_id, users$user_id)]
cycles$current_age = year(cycles$start_date) - cycles$birth_year
cycles$current_age[(cycles$current_age<15)|(cycles$current_age>55)] = NA
users$age_at_first_pregnancy = cycles$current_age[match(paste0(users$user_id, "_",users$first_cycle_preg),cycles$cycle_id)]
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_age_at_first_pregnancy.feather"), overwrite = TRUE)## [1] TRUE
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_age.feather"), overwrite = TRUE)## [1] TRUE
accounts = read_tsv(paste0(IO$input_data,"accounts.csv"))## Parsed with column specification:
## cols(
## id = col_character(),
## objective = col_character(),
## birth_day = col_datetime(format = ""),
## average_cycle = col_double(),
## average_luteal = col_double(),
## average_period = col_double(),
## avr_follicular_temp = col_logical(),
## avr_luteal_temp = col_logical()
## )
users$reprod_obj_app = accounts$objective[match(users$user_id, accounts$id)]
cycles$reprod_obj_app = users$reprod_obj_app[match(cycles$user_id, users$user_id)]
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_reprod_obj_app.feather"), overwrite = TRUE)## [1] TRUE
write_feather(cycles, path = paste0(IO$output_data,"cycles.feather"))
file.copy(from = paste0(IO$output_data,"cycles.feather"), to = paste0(IO$tmp_data,"cycles_with_reprod_obj_app.feather"), overwrite = TRUE)## [1] TRUE
pregnancies = select(cycles[which(cycles$preg_test_class == "pregnant"),], user_id, cycle_id, cycle_nb, cycle_length, preg_type, current_age, reprod_obj_app)
colnames(pregnancies)[match(c("cycle_id","cycle_length"),colnames(pregnancies))] = c("pregnancy_id","preg_duration")
pregnancies$preg_duration[which(pregnancies$cycle_nb == 0)] = NA
write_feather(pregnancies, path = paste0(IO$output_data, "pregnancies.feather"))
file.copy(from = paste0(IO$output_data, "pregnancies.feather"), to = paste0(IO$tmp_data, "pregnancies_1st_version.feather"), overwrite = TRUE)## [1] TRUE
users_agg = aggregate(preg_type ~ user_id, pregnancies, max)
users$user_type = users_agg$preg_type[match(users$user_id, users_agg$user_id)]
write_feather(users, path = paste0(IO$output_data, "users.feather"))
file.copy(from = paste0(IO$output_data, "users.feather"), to = paste0(IO$tmp_data, "users_with_user_type.feather"), overwrite = TRUE)## [1] TRUE
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather( path = paste0(IO$output_data, "cycles.feather"))
pregnancies = read_feather( path = paste0(IO$output_data, "pregnancies.feather"))cycles$preg_outcome_based_on_duration = factor(cycles$preg_test_class,
levels = c(dict$pregnancy_timeline$abbreviation,
unique(cycles$preg_test_class)))
j = which(cycles$preg_test_class == "pregnant")
cycles$preg_outcome_based_on_duration[j] = cut(cycles$cycle_length[j],
breaks = c(0,dict$pregnancy_timeline$duration_in_days),
labels = as.character(dict$pregnancy_timeline$abbreviation))
cycles$cycle_length_next_cycle = cycles$cycle_length[match(paste0(cycles$user_id, "_",cycles$cycle_nb+1),cycles$cycle_id)]
cycles$preg_outcome_cat = as.character(cycles$preg_outcome_based_on_duration)
cycles$preg_outcome_cat[cycles$preg_outcome_based_on_duration == "TB noBF"] = "TB noBF (II)"
cycles$preg_outcome_cat[cycles$preg_outcome_based_on_duration == "BF"] = "TB BF (II)"
cycles$preg_outcome_cat[which((cycles$preg_outcome_cat == "TB") & (cycles$cycle_length_next_cycle <= 7*9))] = "TB noBF (I)"
cycles$preg_outcome_cat[which((cycles$preg_outcome_cat == "TB") & (cycles$cycle_length_next_cycle > 7*9))] = "TB BF (I)"
cycles$preg_outcome_cat[which((cycles$preg_outcome_cat == "TB") & (is.na(cycles$cycle_length_next_cycle)))] = "TB (III)"
cycles$preg_outcome = dict$pregnancy_outcomes$categories[match(cycles$preg_outcome_cat, dict$pregnancy_outcomes$abbreviation)]
cycles$preg_outcome_cat = factor(cycles$preg_outcome_cat, levels = unique(dict$pregnancy_outcomes$abbreviation))
cycles$preg_outcome = factor(cycles$preg_outcome, levels = unique(dict$pregnancy_outcomes$categories))
counts = table(cycles$preg_outcome_cat[j])
table(cycles$preg_outcome_cat[j])/sum(cycles$preg_outcome_cat %in% c("EPL","LPL","ExPTB","PTB","TB noBF (I)","TB noBF (II)","TB (III)","TB BF (I)","TB BF (II)"))##
## FP-VEPL EPL LPL ExPTB PTB
## 0.23621122 0.29455264 0.14251393 0.02087274 0.02312188
## TB noBF (I) TB noBF (II) TB (III) TB BF (I) TB BF (II)
## 0.02952853 0.13264837 0.02019118 0.07490330 0.26166743
## unclear
## 0.24907564
m = match(pregnancies$pregnancy_id, cycles$cycle_id)
pregnancies$preg_outcome_based_on_duration = cycles$preg_outcome_based_on_duration[m]
pregnancies$preg_outcome_cat = cycles$preg_outcome_cat[m]
pregnancies$preg_outcome = cycles$preg_outcome[m]
write_feather(cycles, path = paste0(IO$output_data, "cycles.feather"))
file.copy(from = paste0(IO$output_data, "cycles.feather"), to = paste0(IO$tmp_data, "cycles_with_preg_outcome.feather"), overwrite = TRUE)## [1] TRUE
write_feather(pregnancies, path = paste0(IO$output_data, "pregnancies.feather"))
file.copy(from = paste0(IO$output_data, "pregnancies.feather"), to = paste0(IO$tmp_data, "pregnancies_with_preg_outcome.feather"), overwrite = TRUE)## [1] TRUE
users_preg_outcome = ddply(cycles,
.(user_id),
.fun = summarize,
n_preg = sum(preg_test_class == "pregnant", na.rm = TRUE),
n_PL = sum(preg_outcome %in% c("EPL","LPL"), na.rm = TRUE),
n_LB = sum(preg_outcome %in% c("ExPTB","PTB","TB noBF","BF"), na.rm = TRUE)
)
table(users_preg_outcome$n_LB, users_preg_outcome$n_PL)##
## 0 1 2 3 4 5 6 8
## 0 89300 15169 1569 251 46 10 1 1
## 1 21259 3671 627 108 22 3 0 0
## 2 2620 621 127 16 1 1 0 0
## 3 139 28 8 2 0 0 0 0
## 4 9 1 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 1
table(users_preg_outcome$n_LB)##
## 0 1 2 3 4 5
## 106347 25690 3386 177 10 1
table(users_preg_outcome$n_PL)##
## 0 1 2 3 4 5 6 8
## 113327 19490 2331 377 69 14 1 2
j = which((users_preg_outcome$n_LB + users_preg_outcome$n_PL)>0)
table(users_preg_outcome$n_LB[j], users_preg_outcome$n_PL[j])##
## 0 1 2 3 4 5 6 8
## 0 0 15169 1569 251 46 10 1 1
## 1 21259 3671 627 108 22 3 0 0
## 2 2620 621 127 16 1 1 0 0
## 3 139 28 8 2 0 0 0 0
## 4 9 1 0 0 0 0 0 0
## 5 0 0 0 0 0 0 0 1
table(users_preg_outcome$n_LB[j])##
## 0 1 2 3 4 5
## 17047 25690 3386 177 10 1
round(table(users_preg_outcome$n_LB[j])/sum(table(users_preg_outcome$n_LB[j])) * 100, 2)##
## 0 1 2 3 4 5
## 36.81 55.47 7.31 0.38 0.02 0.00
table(users_preg_outcome$n_PL[j])##
## 0 1 2 3 4 5 6 8
## 24027 19490 2331 377 69 14 1 2
round(table(users_preg_outcome$n_PL[j])/sum(table(users_preg_outcome$n_PL[j])) * 100, 2)##
## 0 1 2 3 4 5 6 8
## 51.88 42.09 5.03 0.81 0.15 0.03 0.00 0.00
dim(users)## [1] 135611 28
dim(users_preg_outcome)## [1] 135611 4
colnames = colnames(users_preg_outcome[,2:ncol(users_preg_outcome)])
m = match(users$user_id, users_preg_outcome$user_id)
for(colname in colnames){
eval(parse(text = paste0("users$",colname," = users_preg_outcome$",colname,"[m]")))
}
write_feather(users, path = paste0(IO$output_data, "users.feather"))
file.copy(from = paste0(IO$output_data, "users.feather"), to = paste0(IO$tmp_data, "users_with_preg_outcome.feather"), overwrite = TRUE)## [1] TRUE
knitr::opts_chunk$set(echo = TRUE, cache = TRUE)users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
pregnancies = read_feather(path = paste0(IO$output_data, "pregnancies.feather"))cycles$cycle_nb_from_next_preg = NA
j_preg = which(cycles$preg_test_class == "pregnant")
cycles$pregnancy_id = NA
j3 = match(paste0(cycles$user_id[j_preg],"_",cycles$cycle_nb[j_preg] - 3),cycles$cycle_id)
cycles$cycle_nb_from_next_preg[j3] = -3
cycles$pregnancy_id[j3[!is.na(j3)]] = cycles$cycle_id[j_preg[!is.na(j3)]]
j2 = match(paste0(cycles$user_id[j_preg],"_",cycles$cycle_nb[j_preg] - 2),cycles$cycle_id)
cycles$cycle_nb_from_next_preg[j2] = -2
cycles$pregnancy_id[j2[!is.na(j2)]] = cycles$cycle_id[j_preg[!is.na(j2)]]
j1 = match(paste0(cycles$user_id[j_preg],"_",cycles$cycle_nb[j_preg] - 1),cycles$cycle_id)
cycles$cycle_nb_from_next_preg[j1] = -1
cycles$pregnancy_id[j1[!is.na(j1)]] = cycles$cycle_id[j_preg[!is.na(j1)]]
j0 = j_preg
cycles$cycle_nb_from_next_preg[j0] = 0
cycles$pregnancy_id[j0] = cycles$cycle_id[j_preg] ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj_app) )+
geom_histogram(position = "identity",binwidth = 7, alpha = 1) +
facet_grid(reprod_obj_app ~ . , scale = "free")+
xlim(0,1000)## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).
ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj_app) )+
geom_bar() +
facet_grid(reprod_obj_app ~ . , scale = "free")t1 = table( pregnancies$reprod_obj_app, pregnancies$preg_outcome)
round(100* t1/apply(t1, 1, sum))##
## FP-VEPL EPL LPL ExPTB PTB TB noBF BF unclear
## avoid_preg 12 15 5 1 2 21 31 12
## get_preg 23 25 12 2 2 8 15 13
## other 23 23 8 2 1 15 20 8
## preg 14 19 10 1 1 11 24 20
## track_period 13 17 7 1 2 19 28 13
It seems a little counter-intuitive as users who declared wanting to avoid pregnancy have the highest proportion of live-birth and the lowest proportion of pregnancy losses.
Maybe, it is because these labels are not reliable.
ggplot(cycles[cycles$n_insemination > 0,], aes(n_insemination, fill = reprod_obj_app))+
geom_histogram(position = "identity",binwidth = 1) + xlim(0,20) +
facet_grid(reprod_obj_app ~ ., scale = "free")## Warning: Removed 216 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).
table(cycles$reprod_obj_app, cycles$n_insemination > 0)##
## FALSE TRUE
## avoid_preg 150691 10748
## get_preg 382690 10443
## other 2844 69
## preg 964279 27698
## track_period 187625 7049
table(cycles$reprod_obj_app, cycles$n_insemination > 0)/apply(table(cycles$reprod_obj_app, cycles$n_insemination > 0)
, 1, sum)##
## FALSE TRUE
## avoid_preg 0.93342377 0.06657623
## get_preg 0.97343647 0.02656353
## other 0.97631308 0.02368692
## preg 0.97207798 0.02792202
## track_period 0.96379075 0.03620925
ggplot(cycles[cycles$n_prot_sex > 0,], aes(n_prot_sex, fill = reprod_obj_app))+
geom_histogram(position = "identity",binwidth = 1) + xlim(0,20) +
facet_grid(reprod_obj_app ~ ., scale = "free")## Warning: Removed 560 rows containing non-finite values (stat_bin).
## Warning: Removed 10 rows containing missing values (geom_bar).
A lot of the users who declared wanting to avoid pregnancy are actually logging inseminations; similarly, many of those who declare wanting to achieve pregnancy log protected sexual intercourses.
It may thus be relevant to attempt to guess the reproductive objectives based on the sexual behavior of the users.
However, sexual intercourses are not logged in many cycles, including those in which positive pregnancy tests are logged.
table(cycles$n_tot_sex == 0) / sum(table(cycles$n_tot_sex == 0))##
## FALSE TRUE
## 0.5339027 0.4660973
t2 = table(cycles$preg_test_class, cycles$n_tot_sex == 0)
t2##
## FALSE TRUE
## not pregnant 126983 12639
## not tested 661325 766263
## pregnant 142891 34035
t2/apply(t2, 1, sum)##
## FALSE TRUE
## not pregnant 0.90947702 0.09052298
## not tested 0.46324640 0.53675360
## pregnant 0.80763144 0.19236856
We can, nonetheless attempt to estimate the reproductive objectives for the users who do log sexual intercourses.
We look at the overlap between their unprotected sexual intercourse and their fertility window in the 3 cycles preceding a pregnancy.
We can define the fertile window counting backward from the end of each cycle, assuming an identical luteal phase for each cycle and each users.
input_days_folder = paste0(IO$output_data,"Days/")
tmp_folder = paste0(IO$tmp_data, "Days_with_fertility/")
if(!dir.exists(tmp_folder)){dir.create(tmp_folder)}
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
days_files = list.files(input_days_folder)
foreach(file = days_files, .packages = c('plyr','dplyr','feather')) %dopar%
{
days = read_feather(path = paste0(input_days_folder,file))
j = which(cycles$cycle_nb_from_next_preg %in% -3:-1)
days = days[days$cycle_id %in% cycles$cycle_id[j],]
days$fertility_counting = dict$fertility_counting$fertility[
match(days$cycleday_from_end, dict$fertility_counting$cycleday_from_end)]
days$fertility_counting[is.na(days$fertility_counting)] = 0
cycles_agg = aggregate(fertility_counting ~ cycle_id, days, mean, na.rm = TRUE)
days_mean_fertility = cycles_agg$fertility_counting[match(days$cycle_id, cycles_agg$cycle_id)]
days$fertility_counting_n = days$fertility_counting - days_mean_fertility
ok = write_feather(days, path = paste0(tmp_folder,file))
}## [[1]]
## # A tibble: 105,724 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001614… 2016-02-03 TRUE FALSE NA NA
## 2 001614… 2016-02-04 FALSE FALSE NA NA
## 3 001614… 2016-02-05 FALSE FALSE NA NA
## 4 001614… 2016-02-06 FALSE FALSE NA NA
## 5 001614… 2016-02-09 FALSE FALSE NA NA
## 6 001614… 2016-02-11 FALSE FALSE NA NA
## 7 001614… 2016-02-12 FALSE FALSE NA NA
## 8 001614… 2016-02-24 FALSE FALSE NA NA
## 9 001614… 2016-02-25 FALSE FALSE NA NA
## 10 001614… 2016-02-26 FALSE FALSE NA NA
## # … with 105,714 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[2]]
## # A tibble: 115,409 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001c54… 2016-04-05 TRUE FALSE NA NA
## 2 001c54… 2016-04-06 FALSE FALSE NA NA
## 3 001c54… 2016-04-07 FALSE FALSE NA NA
## 4 001c54… 2016-04-08 FALSE FALSE NA NA
## 5 001c54… 2016-04-09 FALSE FALSE NA NA
## 6 001c54… 2016-04-18 FALSE FALSE NA NA
## 7 001c54… 2016-04-21 FALSE FALSE NA NA
## 8 001c54… 2016-04-24 FALSE FALSE NA NA
## 9 001d8f… 2016-09-08 TRUE FALSE NA NA
## 10 001d8f… 2016-09-09 FALSE FALSE NA NA
## # … with 115,399 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[3]]
## # A tibble: 38,581 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0060b4… 2018-07-24 TRUE FALSE NA NA
## 2 0060b4… 2018-08-01 FALSE FALSE NA NA
## 3 0060b4… 2018-08-20 TRUE FALSE NA NA
## 4 0060b4… 2018-08-21 FALSE FALSE NA NA
## 5 0060b4… 2018-09-15 TRUE FALSE NA NA
## 6 0060b4… 2018-09-16 FALSE FALSE NA NA
## 7 0060b4… 2018-10-01 FALSE FALSE NA NA
## 8 0060b4… 2018-10-11 FALSE FALSE NA NA
## 9 007b72… 2015-11-12 TRUE FALSE NA NA
## 10 007b72… 2015-12-05 TRUE FALSE NA NA
## # … with 38,571 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[4]]
## # A tibble: 109,479 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0019de… 2015-07-05 TRUE FALSE NA NA
## 2 0019de… 2015-07-06 FALSE FALSE NA NA
## 3 0019de… 2015-07-07 FALSE FALSE NA NA
## 4 0019de… 2015-07-08 FALSE FALSE NA NA
## 5 0019de… 2015-07-09 FALSE FALSE NA NA
## 6 0019de… 2015-07-31 TRUE FALSE NA NA
## 7 0019de… 2015-08-01 FALSE FALSE NA NA
## 8 0019de… 2015-08-02 FALSE FALSE NA NA
## 9 0019de… 2015-08-03 FALSE FALSE NA NA
## 10 0019de… 2015-08-04 FALSE FALSE NA NA
## # … with 109,469 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[5]]
## # A tibble: 95,992 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001944… 2017-04-10 TRUE FALSE NA NA
## 2 001944… 2017-04-11 FALSE FALSE NA NA
## 3 001944… 2017-04-12 FALSE FALSE NA NA
## 4 001944… 2017-04-13 FALSE FALSE NA NA
## 5 001944… 2017-04-14 FALSE FALSE NA NA
## 6 001944… 2017-04-15 FALSE FALSE NA NA
## 7 001944… 2017-04-18 FALSE FALSE NA NA
## 8 001944… 2017-04-22 FALSE FALSE NA NA
## 9 001944… 2017-04-23 FALSE FALSE NA NA
## 10 001944… 2017-04-29 FALSE FALSE NA NA
## # … with 95,982 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[6]]
## # A tibble: 115,974 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000c7f… 2015-07-20 TRUE FALSE NA NA
## 2 000c7f… 2015-07-21 FALSE FALSE NA NA
## 3 000c7f… 2015-07-22 FALSE FALSE NA NA
## 4 000c7f… 2015-08-06 FALSE FALSE NA NA
## 5 000c7f… 2015-08-11 FALSE FALSE NA NA
## 6 000c7f… 2015-08-20 FALSE FALSE NA NA
## 7 000c7f… 2015-08-21 TRUE FALSE NA NA
## 8 000c7f… 2015-08-25 FALSE FALSE NA NA
## 9 000c7f… 2015-08-31 FALSE FALSE NA NA
## 10 000c7f… 2015-09-05 FALSE FALSE NA NA
## # … with 115,964 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[7]]
## # A tibble: 69,633 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00080f… 2016-09-09 TRUE FALSE NA NA
## 2 00080f… 2016-09-10 FALSE FALSE NA NA
## 3 00080f… 2016-09-11 FALSE FALSE NA NA
## 4 00080f… 2016-09-12 FALSE FALSE NA NA
## 5 00080f… 2016-09-13 FALSE FALSE NA NA
## 6 00080f… 2016-10-11 TRUE FALSE NA NA
## 7 00080f… 2016-10-12 FALSE FALSE NA NA
## 8 00080f… 2016-10-13 FALSE FALSE NA NA
## 9 00080f… 2016-10-14 FALSE FALSE NA NA
## 10 00080f… 2016-10-15 FALSE FALSE NA NA
## # … with 69,623 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[8]]
## # A tibble: 94,886 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00600b… 2016-12-26 TRUE FALSE NA NA
## 2 00600b… 2017-01-22 TRUE TRUE NA NA
## 3 00600b… 2017-01-23 FALSE FALSE NA NA
## 4 00600b… 2017-01-24 FALSE FALSE NA NA
## 5 00600b… 2017-01-25 FALSE FALSE NA NA
## 6 00600b… 2017-01-26 FALSE FALSE NA NA
## 7 00600b… 2017-01-27 FALSE FALSE NA NA
## 8 00600b… 2017-01-28 FALSE FALSE NA NA
## 9 00600b… 2017-01-29 FALSE FALSE NA NA
## 10 00600b… 2017-01-30 FALSE FALSE NA NA
## # … with 94,876 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[9]]
## # A tibble: 124,580 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001693… 2017-05-14 TRUE FALSE NA NA
## 2 001693… 2017-05-15 FALSE FALSE NA NA
## 3 001693… 2017-05-16 FALSE FALSE NA NA
## 4 001693… 2017-05-17 FALSE FALSE NA NA
## 5 001693… 2017-05-18 FALSE FALSE NA NA
## 6 001693… 2017-05-19 FALSE FALSE NA NA
## 7 001693… 2017-06-03 FALSE FALSE NA NA
## 8 001693… 2017-06-04 FALSE FALSE NA NA
## 9 001693… 2017-06-05 FALSE FALSE NA NA
## 10 001693… 2017-06-06 FALSE FALSE NA NA
## # … with 124,570 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[10]]
## # A tibble: 32,383 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 007be6… 2014-12-16 TRUE FALSE NA NA
## 2 007be6… 2014-12-17 FALSE FALSE NA NA
## 3 007be6… 2014-12-18 FALSE FALSE NA NA
## 4 007be6… 2014-12-19 FALSE FALSE NA NA
## 5 00852d… 2019-05-08 TRUE FALSE NA NA
## 6 00852d… 2019-05-09 FALSE FALSE NA NA
## 7 00852d… 2019-05-10 FALSE FALSE NA NA
## 8 00852d… 2019-05-16 FALSE FALSE NA NA
## 9 00852d… 2019-05-19 FALSE FALSE NA NA
## 10 00852d… 2019-05-20 FALSE FALSE NA NA
## # … with 32,373 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[11]]
## # A tibble: 79,257 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0040d1… 2016-08-29 TRUE FALSE NA NA
## 2 0040d1… 2016-08-30 FALSE FALSE NA NA
## 3 0040d1… 2016-09-10 FALSE FALSE NA NA
## 4 0040d1… 2016-09-11 FALSE FALSE NA NA
## 5 0040d1… 2016-09-12 FALSE FALSE NA NA
## 6 0040d1… 2016-09-25 TRUE FALSE NA NA
## 7 0040d1… 2016-09-26 FALSE FALSE 97.2 1970-01-01 06:00:00
## 8 0040d1… 2016-09-27 FALSE FALSE 96.8 1970-01-01 06:00:00
## 9 0040d1… 2016-09-28 FALSE FALSE 96.9 1970-01-01 06:00:00
## 10 0040d1… 2016-09-29 FALSE FALSE 97.3 1970-01-01 06:00:00
## # … with 79,247 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[12]]
## # A tibble: 103,150 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 008bac… 2017-01-19 TRUE FALSE NA NA
## 2 008bac… 2017-01-20 FALSE FALSE NA NA
## 3 008bac… 2017-01-21 FALSE FALSE NA NA
## 4 008bac… 2017-02-12 FALSE FALSE NA NA
## 5 008bac… 2017-02-13 FALSE FALSE NA NA
## 6 008bac… 2017-02-14 FALSE FALSE NA NA
## 7 008bac… 2017-02-19 TRUE FALSE NA NA
## 8 008bac… 2017-02-20 FALSE FALSE NA NA
## 9 008bac… 2017-02-21 FALSE FALSE NA NA
## 10 008bac… 2017-02-22 FALSE FALSE NA NA
## # … with 103,140 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[13]]
## # A tibble: 91,549 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000169… 2017-07-26 TRUE FALSE NA NA
## 2 000169… 2017-07-27 FALSE FALSE NA NA
## 3 000169… 2017-07-28 FALSE FALSE NA NA
## 4 000169… 2017-07-29 FALSE FALSE NA NA
## 5 000169… 2017-07-30 FALSE FALSE NA NA
## 6 000169… 2017-08-21 TRUE FALSE NA NA
## 7 000169… 2017-08-22 FALSE FALSE NA NA
## 8 000169… 2017-08-23 FALSE FALSE NA NA
## 9 000169… 2017-08-24 FALSE FALSE NA NA
## 10 000169… 2017-08-25 FALSE FALSE NA NA
## # … with 91,539 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[14]]
## # A tibble: 35,525 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 006fa7… 2015-02-12 TRUE FALSE 97.7 NA
## 2 006fa7… 2015-02-13 FALSE FALSE 97.9 NA
## 3 006fa7… 2015-02-14 FALSE FALSE NA NA
## 4 006fa7… 2015-02-15 FALSE FALSE 97.9 NA
## 5 006fa7… 2015-02-16 FALSE FALSE NA NA
## 6 006fa7… 2015-02-17 FALSE FALSE 97.1 NA
## 7 006fa7… 2015-02-18 FALSE FALSE 97.8 NA
## 8 006fa7… 2015-02-19 FALSE FALSE 97.4 NA
## 9 006fa7… 2015-02-20 FALSE FALSE 96.9 NA
## 10 006fa7… 2015-02-21 FALSE FALSE 97.6 NA
## # … with 35,515 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[15]]
## # A tibble: 88,893 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000a01… 2017-03-04 TRUE FALSE NA NA
## 2 000a01… 2017-03-06 FALSE FALSE NA NA
## 3 000a01… 2017-04-01 TRUE FALSE NA NA
## 4 000a01… 2017-04-02 FALSE FALSE NA NA
## 5 000a01… 2017-04-03 FALSE FALSE NA NA
## 6 000a01… 2017-04-08 FALSE FALSE NA NA
## 7 000a01… 2017-04-10 FALSE FALSE NA NA
## 8 000a01… 2017-04-11 FALSE FALSE NA NA
## 9 000a01… 2017-04-12 FALSE FALSE NA NA
## 10 000a01… 2017-04-13 FALSE FALSE NA NA
## # … with 88,883 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[16]]
## # A tibble: 116,110 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000c4c… 2013-06-04 TRUE FALSE NA NA
## 2 000c4c… 2013-06-05 FALSE FALSE NA NA
## 3 000c4c… 2013-06-06 FALSE FALSE NA NA
## 4 000c4c… 2013-06-07 FALSE FALSE NA NA
## 5 000c4c… 2013-06-08 FALSE FALSE NA NA
## 6 000c4c… 2013-06-09 FALSE FALSE NA NA
## 7 000c4c… 2013-07-04 TRUE FALSE NA NA
## 8 000c4c… 2013-07-05 FALSE FALSE NA NA
## 9 000c4c… 2013-07-06 FALSE FALSE NA NA
## 10 000c4c… 2013-08-03 FALSE FALSE NA NA
## # … with 116,100 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[17]]
## # A tibble: 79,250 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 006ded… 2014-08-15 TRUE FALSE NA NA
## 2 006ded… 2014-08-16 FALSE FALSE NA NA
## 3 006ded… 2014-08-17 FALSE FALSE NA NA
## 4 006ded… 2014-08-18 FALSE FALSE NA NA
## 5 006ded… 2014-08-19 FALSE FALSE NA NA
## 6 006ded… 2014-08-20 FALSE FALSE 97.0 NA
## 7 006ded… 2014-08-24 FALSE FALSE NA NA
## 8 006ded… 2014-08-25 FALSE FALSE 97.1 NA
## 9 006ded… 2014-08-27 FALSE FALSE NA NA
## 10 006ded… 2014-08-30 FALSE FALSE NA NA
## # … with 79,240 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[18]]
## # A tibble: 108,255 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0005ec… 2016-10-28 TRUE FALSE 98 1970-01-01 07:46:00
## 2 0005ec… 2016-10-29 FALSE FALSE NA NA
## 3 0005ec… 2016-10-30 FALSE FALSE NA NA
## 4 0005ec… 2016-10-31 FALSE FALSE NA NA
## 5 0005ec… 2016-11-01 FALSE FALSE NA NA
## 6 0005ec… 2016-11-02 FALSE FALSE NA NA
## 7 0005ec… 2016-11-03 FALSE FALSE 97.5 1970-01-01 21:41:00
## 8 0005ec… 2016-11-04 FALSE FALSE NA NA
## 9 0005ec… 2016-11-05 FALSE FALSE NA NA
## 10 0005ec… 2016-11-06 FALSE FALSE 97.9 1970-01-01 07:09:00
## # … with 108,245 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[19]]
## # A tibble: 114,430 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0013ca… 2014-12-04 FALSE FALSE NA NA
## 2 0013ca… 2014-12-05 TRUE FALSE NA NA
## 3 0013ca… 2014-12-06 FALSE FALSE NA NA
## 4 0013ca… 2014-12-07 FALSE FALSE NA NA
## 5 0013ca… 2014-12-08 FALSE FALSE NA NA
## 6 0013ca… 2014-12-09 FALSE FALSE NA NA
## 7 0013ca… 2015-01-01 TRUE FALSE NA NA
## 8 0013ca… 2015-01-02 FALSE FALSE NA NA
## 9 0013ca… 2015-01-03 FALSE FALSE NA NA
## 10 0013ca… 2015-01-04 FALSE FALSE NA NA
## # … with 114,420 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[20]]
## # A tibble: 57,124 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 002a79… 2017-10-12 TRUE FALSE NA NA
## 2 002a79… 2017-10-13 FALSE FALSE NA NA
## 3 002a79… 2017-10-14 FALSE FALSE NA NA
## 4 002a79… 2017-10-15 FALSE FALSE NA NA
## 5 002a79… 2017-11-10 FALSE FALSE NA NA
## 6 002a79… 2017-11-11 FALSE FALSE NA NA
## 7 002a79… 2017-11-12 FALSE FALSE NA NA
## 8 002a79… 2017-11-13 TRUE FALSE NA NA
## 9 002a79… 2017-11-14 FALSE FALSE NA NA
## 10 002a79… 2017-11-15 FALSE FALSE NA NA
## # … with 57,114 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[21]]
## # A tibble: 97,286 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00247c… 2015-11-20 TRUE FALSE NA NA
## 2 00247c… 2015-11-21 FALSE FALSE NA NA
## 3 00247c… 2015-11-22 FALSE FALSE NA NA
## 4 00247c… 2015-11-23 FALSE FALSE NA NA
## 5 00247c… 2015-11-24 FALSE FALSE NA NA
## 6 00247c… 2015-11-25 FALSE FALSE NA NA
## 7 00247c… 2015-12-17 TRUE FALSE NA NA
## 8 00247c… 2015-12-18 FALSE FALSE NA NA
## 9 00247c… 2015-12-19 FALSE FALSE NA NA
## 10 00247c… 2015-12-20 FALSE FALSE NA NA
## # … with 97,276 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[22]]
## # A tibble: 99,770 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 002b08… 2016-05-16 TRUE FALSE NA NA
## 2 002b08… 2016-05-17 FALSE FALSE NA NA
## 3 002b08… 2016-05-18 FALSE FALSE NA NA
## 4 002b08… 2016-05-19 FALSE FALSE NA NA
## 5 002b08… 2016-05-20 FALSE FALSE NA NA
## 6 002b08… 2016-05-21 FALSE FALSE NA NA
## 7 002b08… 2016-05-22 FALSE FALSE NA NA
## 8 002b08… 2016-05-23 FALSE FALSE NA NA
## 9 002b08… 2016-05-24 FALSE FALSE NA NA
## 10 002b08… 2016-05-25 FALSE FALSE NA NA
## # … with 99,760 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[23]]
## # A tibble: 53,453 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 009cc5… 2016-08-16 TRUE FALSE NA NA
## 2 009cc5… 2016-08-17 FALSE FALSE NA NA
## 3 009cc5… 2016-08-18 FALSE FALSE NA NA
## 4 009cc5… 2016-09-15 TRUE FALSE NA NA
## 5 009cc5… 2016-09-16 FALSE FALSE NA NA
## 6 009cc5… 2016-09-17 FALSE FALSE NA NA
## 7 009cc5… 2016-09-24 FALSE FALSE NA NA
## 8 009cc5… 2016-10-08 TRUE TRUE NA NA
## 9 009cc5… 2016-10-09 FALSE FALSE NA NA
## 10 009cc5… 2016-10-10 FALSE FALSE NA NA
## # … with 53,443 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[24]]
## # A tibble: 60,992 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 008145… 2017-08-20 TRUE FALSE NA NA
## 2 008145… 2017-08-21 FALSE FALSE NA NA
## 3 008145… 2017-08-22 FALSE FALSE NA NA
## 4 008145… 2017-08-23 FALSE FALSE NA NA
## 5 008145… 2017-08-24 FALSE FALSE NA NA
## 6 008145… 2017-08-25 FALSE FALSE NA NA
## 7 008145… 2017-08-26 FALSE FALSE NA NA
## 8 008145… 2017-08-28 FALSE FALSE NA NA
## 9 008145… 2017-09-02 FALSE FALSE NA NA
## 10 008145… 2017-09-03 FALSE FALSE NA NA
## # … with 60,982 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[25]]
## # A tibble: 107,842 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001d3d… 2015-04-29 TRUE FALSE NA NA
## 2 001d3d… 2015-04-30 FALSE FALSE NA NA
## 3 001d3d… 2015-05-01 FALSE FALSE NA NA
## 4 001d3d… 2015-05-02 FALSE FALSE NA NA
## 5 001d3d… 2015-05-03 FALSE FALSE NA NA
## 6 001d3d… 2015-05-25 FALSE FALSE 95 NA
## 7 001d3d… 2015-05-29 TRUE FALSE NA NA
## 8 001d3d… 2015-05-30 FALSE FALSE NA NA
## 9 001d3d… 2015-05-31 FALSE FALSE NA NA
## 10 001d3d… 2015-06-01 FALSE FALSE NA NA
## # … with 107,832 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[26]]
## # A tibble: 96,350 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000729… 2017-05-21 TRUE FALSE 97.7 1970-01-01 21:56:00
## 2 000729… 2017-05-22 FALSE FALSE 97.1 1970-01-01 21:47:00
## 3 000729… 2017-05-23 FALSE FALSE 97.0 1970-01-01 21:44:00
## 4 000729… 2017-05-24 FALSE FALSE 97.3 1970-01-01 21:54:00
## 5 000729… 2017-05-25 FALSE FALSE 97.3 1970-01-01 23:55:00
## 6 000729… 2017-05-26 FALSE FALSE 96.8 1970-01-01 22:44:00
## 7 000729… 2017-05-27 FALSE FALSE 96.8 1970-01-01 22:44:00
## 8 000729… 2017-05-28 FALSE FALSE 96.4 1970-01-01 23:36:00
## 9 000729… 2017-05-29 FALSE FALSE 97.3 1970-01-01 21:46:00
## 10 000729… 2017-05-30 FALSE FALSE 96.9 1970-01-01 21:02:00
## # … with 96,340 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[27]]
## # A tibble: 122,162 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0008c8… 2015-08-01 TRUE FALSE NA NA
## 2 000cd8… 2014-07-14 FALSE FALSE NA NA
## 3 000cd8… 2014-07-31 FALSE FALSE NA NA
## 4 000cd8… 2014-08-01 FALSE FALSE NA NA
## 5 000cd8… 2014-08-09 FALSE FALSE NA NA
## 6 000cd8… 2014-08-10 FALSE FALSE NA NA
## 7 000cd8… 2014-08-11 FALSE FALSE NA NA
## 8 000cd8… 2014-11-24 FALSE FALSE 97.9 NA
## 9 000cd8… 2014-11-25 FALSE FALSE 98.1 NA
## 10 000cd8… 2014-11-26 TRUE FALSE 97.6 NA
## # … with 122,152 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[28]]
## # A tibble: 118,587 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0008a9… 2015-09-28 TRUE FALSE NA NA
## 2 0008a9… 2015-09-29 FALSE FALSE NA NA
## 3 0008a9… 2015-09-30 FALSE FALSE NA NA
## 4 0008a9… 2015-10-01 FALSE FALSE NA NA
## 5 0008a9… 2015-10-02 FALSE FALSE NA NA
## 6 0008a9… 2015-10-03 FALSE FALSE NA NA
## 7 0008a9… 2015-10-18 FALSE FALSE NA NA
## 8 0008a9… 2015-10-19 FALSE FALSE NA NA
## 9 0008a9… 2015-10-20 FALSE FALSE NA NA
## 10 0008a9… 2015-10-22 FALSE FALSE 97.7 NA
## # … with 118,577 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[29]]
## # A tibble: 99,219 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001338… 2015-09-23 TRUE FALSE NA NA
## 2 001338… 2015-09-24 FALSE FALSE NA NA
## 3 001338… 2015-09-25 FALSE FALSE 97.4 NA
## 4 001338… 2015-09-26 FALSE FALSE 97.8 NA
## 5 001338… 2015-09-27 FALSE FALSE 98.1 NA
## 6 001338… 2015-09-28 FALSE FALSE 97.6 NA
## 7 001338… 2015-09-29 FALSE FALSE 97.8 NA
## 8 001338… 2015-09-30 FALSE FALSE 96.9 NA
## 9 001338… 2015-10-01 FALSE FALSE 97.3 NA
## 10 001338… 2015-10-02 FALSE FALSE 97.3 NA
## # … with 99,209 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[30]]
## # A tibble: 59,070 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 005b37… 2015-10-28 TRUE FALSE NA NA
## 2 005b37… 2015-10-29 FALSE FALSE NA NA
## 3 005b37… 2015-10-30 FALSE FALSE NA NA
## 4 005b37… 2015-10-31 FALSE FALSE NA NA
## 5 005b37… 2015-11-01 FALSE FALSE NA NA
## 6 005b37… 2015-11-02 FALSE FALSE 97.9 NA
## 7 005b37… 2015-11-03 FALSE FALSE 97.7 NA
## 8 005b37… 2015-11-25 FALSE FALSE NA NA
## 9 005b37… 2015-11-26 TRUE FALSE NA NA
## 10 005b37… 2015-11-27 FALSE FALSE NA NA
## # … with 59,060 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[31]]
## # A tibble: 30,844 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 01298c… 2015-03-24 TRUE FALSE NA NA
## 2 01298c… 2015-03-25 FALSE FALSE NA NA
## 3 01298c… 2015-04-07 FALSE FALSE NA NA
## 4 01298c… 2015-06-21 TRUE FALSE NA NA
## 5 01298c… 2015-06-22 FALSE FALSE NA NA
## 6 01298c… 2015-06-23 FALSE FALSE NA NA
## 7 01298c… 2015-06-24 FALSE FALSE NA NA
## 8 01298c… 2015-06-25 FALSE FALSE NA NA
## 9 01298c… 2015-07-20 TRUE FALSE NA NA
## 10 01298c… 2015-07-21 FALSE FALSE NA NA
## # … with 30,834 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[32]]
## # A tibble: 70,250 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0005d5… 2016-03-10 TRUE FALSE NA NA
## 2 0005d5… 2016-03-11 FALSE FALSE NA NA
## 3 0005d5… 2016-03-12 FALSE FALSE NA NA
## 4 0005d5… 2016-03-13 FALSE FALSE NA NA
## 5 0005d5… 2016-03-14 FALSE FALSE NA NA
## 6 0005d5… 2016-03-16 FALSE FALSE 98.1 NA
## 7 0005d5… 2016-03-17 FALSE FALSE 96.8 NA
## 8 000c67… 2018-04-22 TRUE FALSE NA NA
## 9 000c67… 2018-04-23 FALSE FALSE NA NA
## 10 000c67… 2018-04-24 FALSE FALSE NA NA
## # … with 70,240 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[33]]
## # A tibble: 44,844 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 006faf… 2016-04-14 TRUE FALSE NA NA
## 2 006faf… 2016-04-15 FALSE FALSE NA NA
## 3 006faf… 2016-04-16 FALSE FALSE NA NA
## 4 006faf… 2016-04-17 FALSE FALSE NA NA
## 5 006faf… 2016-04-20 FALSE FALSE NA NA
## 6 006faf… 2016-04-26 FALSE FALSE NA NA
## 7 006faf… 2016-04-28 FALSE FALSE NA NA
## 8 006faf… 2016-04-29 FALSE FALSE NA NA
## 9 006faf… 2016-05-01 FALSE FALSE NA NA
## 10 006faf… 2016-05-02 FALSE FALSE NA NA
## # … with 44,834 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[34]]
## # A tibble: 109,539 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000d3a… 2019-04-19 TRUE FALSE NA NA
## 2 000d3a… 2019-04-30 FALSE FALSE NA NA
## 3 000d3a… 2019-05-15 TRUE FALSE NA NA
## 4 000d3a… 2019-05-25 FALSE FALSE NA NA
## 5 000d3a… 2019-06-10 TRUE FALSE NA NA
## 6 000d3a… 2019-06-11 FALSE FALSE NA NA
## 7 000d3a… 2019-06-12 FALSE FALSE NA NA
## 8 000d3a… 2019-06-13 FALSE FALSE NA NA
## 9 0020a7… 2016-09-13 TRUE FALSE 97.5 1970-01-01 07:54:00
## 10 0020a7… 2016-09-14 FALSE FALSE 97.5 1970-01-01 07:21:00
## # … with 109,529 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[35]]
## # A tibble: 59,929 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00336d… 2016-11-06 TRUE FALSE NA NA
## 2 00336d… 2016-11-07 FALSE FALSE NA NA
## 3 00336d… 2016-11-08 FALSE FALSE NA NA
## 4 00336d… 2016-11-09 FALSE FALSE NA NA
## 5 00336d… 2016-11-10 FALSE FALSE NA NA
## 6 00336d… 2016-11-30 TRUE FALSE NA NA
## 7 00336d… 2016-12-01 FALSE FALSE NA NA
## 8 00336d… 2016-12-02 FALSE FALSE NA NA
## 9 00336d… 2016-12-03 FALSE FALSE NA NA
## 10 00336d… 2016-12-04 FALSE FALSE NA NA
## # … with 59,919 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[36]]
## # A tibble: 91,435 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000255… 2014-02-05 TRUE FALSE 96 NA
## 2 000255… 2014-02-06 FALSE FALSE 96.5 NA
## 3 000255… 2014-02-07 FALSE FALSE NA NA
## 4 000255… 2014-02-08 FALSE FALSE NA NA
## 5 000255… 2014-02-09 FALSE FALSE NA NA
## 6 000255… 2014-02-10 FALSE FALSE 97.9 NA
## 7 000255… 2014-02-11 FALSE FALSE 96.9 NA
## 8 000255… 2014-02-12 FALSE FALSE 97.5 NA
## 9 000255… 2014-02-13 FALSE FALSE 97.1 NA
## 10 000255… 2014-02-14 FALSE FALSE 97.9 NA
## # … with 91,425 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[37]]
## # A tibble: 66,044 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 003a81… 2015-11-13 TRUE FALSE NA NA
## 2 003a81… 2015-11-14 FALSE FALSE 97.2 NA
## 3 003a81… 2015-11-15 FALSE FALSE 96.8 NA
## 4 003a81… 2015-11-16 FALSE FALSE 98.1 NA
## 5 003a81… 2015-11-17 FALSE FALSE 97.8 NA
## 6 003a81… 2015-11-18 FALSE FALSE 98.8 NA
## 7 003a81… 2015-11-19 FALSE FALSE 98.0 NA
## 8 003a81… 2015-11-20 FALSE FALSE 98.6 NA
## 9 003a81… 2015-11-21 FALSE FALSE 98.9 NA
## 10 003a81… 2015-11-22 FALSE FALSE NA NA
## # … with 66,034 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[38]]
## # A tibble: 109,770 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000416… 2017-02-06 TRUE FALSE 95.0 1970-01-01 06:04:00
## 2 000416… 2017-02-07 FALSE FALSE 94.8 1970-01-01 06:22:00
## 3 000416… 2017-02-08 FALSE FALSE 96.0 1970-01-01 06:16:00
## 4 000416… 2017-02-09 FALSE FALSE 95.4 1970-01-01 06:16:00
## 5 000416… 2017-02-10 FALSE FALSE NA NA
## 6 000416… 2017-02-11 FALSE FALSE NA NA
## 7 000416… 2017-02-12 FALSE FALSE 94.4 1970-01-01 08:28:00
## 8 000416… 2017-02-13 FALSE FALSE 96.1 1970-01-01 06:17:00
## 9 000416… 2017-02-14 FALSE FALSE 95.0 1970-01-01 06:18:00
## 10 000416… 2017-02-15 FALSE FALSE 96.3 1970-01-01 06:22:00
## # … with 109,760 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[39]]
## # A tibble: 42,598 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000d6d… 2018-08-11 TRUE FALSE NA NA
## 2 000d6d… 2018-08-12 FALSE FALSE NA NA
## 3 000d6d… 2018-08-13 FALSE FALSE NA NA
## 4 000d6d… 2018-08-14 FALSE FALSE NA NA
## 5 000d6d… 2018-08-15 FALSE FALSE NA NA
## 6 000d6d… 2018-08-17 FALSE FALSE 98.6 1970-01-01 18:25:00
## 7 000d6d… 2018-08-18 FALSE FALSE 97.2 1970-01-01 07:18:00
## 8 000d6d… 2018-08-19 FALSE FALSE 96.4 1970-01-01 20:48:00
## 9 000d6d… 2018-08-22 FALSE FALSE 97.7 1970-01-01 22:17:00
## 10 000d6d… 2018-08-23 FALSE FALSE 97.6 1970-01-01 19:26:00
## # … with 42,588 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[40]]
## # A tibble: 80,661 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0014ae… 2016-03-08 TRUE FALSE NA NA
## 2 0014ae… 2016-03-09 FALSE FALSE NA NA
## 3 0014ae… 2016-03-10 FALSE FALSE NA NA
## 4 0014ae… 2016-03-11 FALSE FALSE NA NA
## 5 0014ae… 2016-03-12 FALSE FALSE NA NA
## 6 0014ae… 2016-04-04 TRUE FALSE NA NA
## 7 0014ae… 2016-04-05 FALSE FALSE NA NA
## 8 0014ae… 2016-04-06 FALSE FALSE NA NA
## 9 0014ae… 2016-04-07 FALSE FALSE NA NA
## 10 0014ae… 2016-04-08 FALSE FALSE NA NA
## # … with 80,651 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[41]]
## # A tibble: 106,090 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0009fe… 2016-09-15 TRUE FALSE NA NA
## 2 0009fe… 2016-09-16 FALSE FALSE NA NA
## 3 0009fe… 2016-09-17 FALSE FALSE NA NA
## 4 0009fe… 2016-09-18 FALSE FALSE NA NA
## 5 0009fe… 2016-09-19 FALSE FALSE NA NA
## 6 0009fe… 2016-09-20 FALSE FALSE NA NA
## 7 0009fe… 2016-10-14 TRUE FALSE NA NA
## 8 0009fe… 2016-10-15 FALSE FALSE NA NA
## 9 0009fe… 2016-10-16 FALSE FALSE NA NA
## 10 0009fe… 2016-10-17 FALSE FALSE NA NA
## # … with 106,080 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[42]]
## # A tibble: 124,853 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00033c… 2017-07-01 TRUE FALSE NA NA
## 2 00033c… 2017-07-29 TRUE FALSE NA NA
## 3 00033c… 2017-07-30 FALSE FALSE NA NA
## 4 00033c… 2017-07-31 FALSE FALSE NA NA
## 5 00033c… 2017-08-11 TRUE FALSE NA NA
## 6 00033c… 2017-08-25 TRUE FALSE NA NA
## 7 00033c… 2017-08-26 FALSE FALSE NA NA
## 8 00033c… 2017-08-27 FALSE FALSE NA NA
## 9 00033c… 2017-08-28 FALSE FALSE NA NA
## 10 00033c… 2017-08-29 FALSE FALSE NA NA
## # … with 124,843 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[43]]
## # A tibble: 66,191 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 006285… 2016-06-29 TRUE FALSE 97.3 1970-01-01 05:52:00
## 2 006285… 2016-06-30 FALSE FALSE 97.6 1970-01-01 06:41:00
## 3 006285… 2016-07-01 FALSE FALSE NA NA
## 4 006285… 2016-07-02 FALSE FALSE 97.9 1970-01-01 07:59:00
## 5 006285… 2016-07-03 FALSE FALSE NA NA
## 6 006285… 2016-07-04 FALSE FALSE NA NA
## 7 006285… 2016-07-05 FALSE FALSE NA NA
## 8 006285… 2016-07-06 FALSE FALSE 97.8 1970-01-01 07:02:00
## 9 006285… 2016-07-07 FALSE FALSE 97.8 1970-01-01 06:49:00
## 10 006285… 2016-07-08 FALSE FALSE 97.7 1970-01-01 07:21:00
## # … with 66,181 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[44]]
## # A tibble: 121,641 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001ffc… 2015-03-25 TRUE FALSE 98 NA
## 2 001ffc… 2015-03-26 FALSE FALSE 97.6 NA
## 3 001ffc… 2015-03-27 FALSE FALSE 97.6 NA
## 4 001ffc… 2015-03-28 FALSE FALSE 97.5 NA
## 5 001ffc… 2015-03-29 FALSE FALSE 97.7 NA
## 6 001ffc… 2015-03-30 FALSE FALSE 97.9 NA
## 7 001ffc… 2015-03-31 FALSE FALSE 97.7 NA
## 8 001ffc… 2015-04-01 FALSE FALSE 97.6 NA
## 9 001ffc… 2015-04-02 FALSE FALSE 98 NA
## 10 001ffc… 2015-04-03 FALSE FALSE 97.3 NA
## # … with 121,631 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[45]]
## # A tibble: 26,710 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 002ad2… 2014-09-01 TRUE FALSE NA NA
## 2 002ad2… 2014-09-02 FALSE FALSE NA NA
## 3 002ad2… 2014-09-03 FALSE FALSE NA NA
## 4 002ad2… 2014-09-04 FALSE FALSE 97.7 NA
## 5 002ad2… 2014-09-05 FALSE FALSE NA NA
## 6 002ad2… 2014-09-06 FALSE FALSE 97.4 NA
## 7 002ad2… 2014-09-07 FALSE FALSE 98.0 NA
## 8 002ad2… 2014-09-08 FALSE FALSE 97.6 NA
## 9 002ad2… 2014-09-09 FALSE FALSE 97.2 NA
## 10 002ad2… 2014-09-10 FALSE FALSE 97.8 NA
## # … with 26,700 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[46]]
## # A tibble: 75,428 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 007b9c… 2017-11-12 TRUE FALSE NA NA
## 2 007b9c… 2017-11-13 FALSE FALSE NA NA
## 3 007b9c… 2017-11-14 FALSE FALSE NA NA
## 4 007b9c… 2018-01-01 TRUE FALSE NA NA
## 5 007b9c… 2018-01-02 FALSE FALSE NA NA
## 6 007b9c… 2018-01-03 FALSE FALSE NA NA
## 7 007b9c… 2018-01-27 TRUE FALSE NA NA
## 8 007b9c… 2018-01-28 FALSE FALSE NA NA
## 9 007b9c… 2018-01-29 FALSE FALSE 97.2 1970-01-01 17:44:00
## 10 007b9c… 2018-01-30 FALSE FALSE 97.2 1970-01-01 17:44:00
## # … with 75,418 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[47]]
## # A tibble: 94,737 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 001a02… 2015-12-17 TRUE FALSE NA NA
## 2 001a02… 2015-12-18 FALSE FALSE NA NA
## 3 001a02… 2015-12-19 FALSE FALSE NA NA
## 4 001a02… 2015-12-20 FALSE FALSE NA NA
## 5 001a02… 2015-12-21 FALSE FALSE NA NA
## 6 001a02… 2015-12-22 FALSE FALSE NA NA
## 7 001a02… 2015-12-23 FALSE FALSE NA NA
## 8 001a02… 2015-12-24 FALSE FALSE 96.8 NA
## 9 001a02… 2015-12-25 FALSE FALSE 97.3 NA
## 10 001a02… 2015-12-26 FALSE FALSE 97.8 NA
## # … with 94,727 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[48]]
## # A tibble: 65,067 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0061a0… 2015-08-10 TRUE FALSE NA NA
## 2 0061a0… 2015-08-11 FALSE FALSE NA NA
## 3 0061a0… 2015-08-12 FALSE FALSE NA NA
## 4 0061a0… 2015-08-23 FALSE FALSE NA NA
## 5 0061a0… 2015-08-27 FALSE FALSE 97.7 NA
## 6 0061a0… 2015-08-28 FALSE FALSE 98.1 NA
## 7 0061a0… 2015-08-29 FALSE FALSE 98.1 NA
## 8 0061a0… 2015-08-30 FALSE FALSE 97.9 NA
## 9 0061a0… 2015-08-31 FALSE FALSE 97.7 NA
## 10 0061a0… 2015-09-01 FALSE FALSE 97.5 NA
## # … with 65,057 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[49]]
## # A tibble: 76,908 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 009542… 2016-08-21 TRUE FALSE NA NA
## 2 009542… 2016-08-22 FALSE FALSE NA NA
## 3 009542… 2016-08-23 FALSE FALSE NA NA
## 4 009542… 2016-09-05 FALSE FALSE NA NA
## 5 009542… 2016-09-13 FALSE FALSE NA NA
## 6 009542… 2016-09-16 FALSE FALSE NA NA
## 7 00b4c8… 2015-11-26 TRUE FALSE NA NA
## 8 00b4c8… 2015-11-27 FALSE FALSE NA NA
## 9 00b4c8… 2015-11-28 FALSE FALSE NA NA
## 10 00b4c8… 2015-12-26 TRUE FALSE NA NA
## # … with 76,898 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[50]]
## # A tibble: 54,961 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00b8be… 2017-10-24 TRUE FALSE NA NA
## 2 00b8be… 2017-10-25 FALSE FALSE NA NA
## 3 00b8be… 2017-10-26 FALSE FALSE NA NA
## 4 00b8be… 2017-10-27 FALSE FALSE NA NA
## 5 00b8be… 2017-10-28 FALSE FALSE NA NA
## 6 00b8be… 2017-10-29 FALSE FALSE NA NA
## 7 00b8be… 2017-10-30 FALSE FALSE NA NA
## 8 00b8be… 2017-10-31 FALSE FALSE NA NA
## 9 00b8be… 2017-11-01 FALSE FALSE NA NA
## 10 00b8be… 2017-11-02 FALSE FALSE NA NA
## # … with 54,951 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[51]]
## # A tibble: 38,091 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00b400… 2014-06-24 TRUE FALSE NA NA
## 2 00b400… 2014-06-25 FALSE FALSE NA NA
## 3 00b400… 2014-06-26 FALSE FALSE NA NA
## 4 00b400… 2014-07-21 TRUE FALSE NA NA
## 5 00b400… 2014-07-22 FALSE FALSE NA NA
## 6 00b400… 2014-07-23 FALSE FALSE NA NA
## 7 00b400… 2014-07-24 FALSE FALSE NA NA
## 8 00b400… 2014-07-25 FALSE FALSE NA NA
## 9 00b400… 2014-07-26 FALSE FALSE NA NA
## 10 00b400… 2014-07-27 FALSE FALSE NA NA
## # … with 38,081 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[52]]
## # A tibble: 65,793 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00190b… 2017-06-26 TRUE FALSE NA NA
## 2 00190b… 2017-06-27 FALSE FALSE NA NA
## 3 00190b… 2017-06-28 FALSE FALSE NA NA
## 4 00190b… 2017-06-29 FALSE FALSE NA NA
## 5 00190b… 2017-06-30 FALSE FALSE NA NA
## 6 00190b… 2017-07-01 FALSE FALSE NA NA
## 7 00190b… 2017-07-04 FALSE FALSE NA NA
## 8 00190b… 2017-07-05 FALSE FALSE NA NA
## 9 00190b… 2017-07-10 FALSE FALSE NA NA
## 10 00190b… 2017-07-11 FALSE FALSE NA NA
## # … with 65,783 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[53]]
## # A tibble: 79,901 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00027f… 2016-06-01 TRUE FALSE 98.5 1970-01-01 16:44:00
## 2 00027f… 2016-06-02 FALSE FALSE NA NA
## 3 00027f… 2016-06-03 FALSE FALSE NA NA
## 4 00027f… 2016-06-04 FALSE FALSE NA NA
## 5 00027f… 2016-06-05 FALSE FALSE NA NA
## 6 00027f… 2016-06-06 FALSE FALSE 97.7 1970-01-01 07:19:00
## 7 00027f… 2016-06-07 FALSE FALSE 97.9 1970-01-01 07:22:00
## 8 00027f… 2016-06-08 FALSE FALSE 98.2 1970-01-01 06:45:00
## 9 00027f… 2016-06-09 FALSE FALSE 97.9 1970-01-01 06:44:00
## 10 00027f… 2016-06-10 FALSE FALSE 98.4 1970-01-01 06:40:00
## # … with 79,891 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[54]]
## # A tibble: 102,901 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0091f3… 2015-04-19 TRUE FALSE NA NA
## 2 0091f3… 2015-04-20 FALSE FALSE NA NA
## 3 0091f3… 2015-04-21 FALSE FALSE NA NA
## 4 0091f3… 2015-04-22 FALSE FALSE NA NA
## 5 0091f3… 2015-04-23 FALSE FALSE NA NA
## 6 0091f3… 2015-04-24 FALSE FALSE NA NA
## 7 0091f3… 2015-04-25 FALSE FALSE NA NA
## 8 0091f3… 2015-04-27 FALSE FALSE NA NA
## 9 0091f3… 2015-05-01 FALSE FALSE NA NA
## 10 0091f3… 2015-05-02 FALSE FALSE NA NA
## # … with 102,891 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[55]]
## # A tibble: 45,864 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 005bfc… 2015-04-04 TRUE FALSE NA NA
## 2 005bfc… 2015-04-05 FALSE FALSE NA NA
## 3 005bfc… 2015-04-06 FALSE FALSE NA NA
## 4 005bfc… 2015-04-07 FALSE FALSE NA NA
## 5 005bfc… 2015-04-08 FALSE FALSE NA NA
## 6 005bfc… 2015-04-14 FALSE FALSE NA NA
## 7 005bfc… 2015-05-03 TRUE FALSE NA NA
## 8 005bfc… 2015-05-04 FALSE FALSE NA NA
## 9 005bfc… 2015-05-05 FALSE FALSE NA NA
## 10 005bfc… 2015-05-06 FALSE FALSE NA NA
## # … with 45,854 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[56]]
## # A tibble: 64,105 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 0047a0… 2018-04-14 TRUE FALSE NA NA
## 2 0047a0… 2018-04-15 FALSE FALSE NA NA
## 3 0047a0… 2018-04-16 FALSE FALSE NA NA
## 4 0047a0… 2018-04-17 FALSE FALSE NA NA
## 5 0047a0… 2018-04-18 FALSE FALSE NA NA
## 6 0047a0… 2018-04-21 FALSE FALSE NA NA
## 7 0047a0… 2018-04-24 FALSE FALSE NA NA
## 8 0047a0… 2018-04-25 FALSE FALSE NA NA
## 9 0047a0… 2018-04-26 FALSE FALSE NA NA
## 10 0047a0… 2018-04-27 FALSE FALSE NA NA
## # … with 64,095 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[57]]
## # A tibble: 108,004 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000117… 2016-11-15 TRUE FALSE NA NA
## 2 000117… 2016-11-16 FALSE FALSE 97.0 1970-01-01 06:05:00
## 3 000117… 2016-11-17 FALSE FALSE 97.2 1970-01-01 06:09:00
## 4 000117… 2016-11-18 FALSE FALSE 97.0 1970-01-01 06:10:00
## 5 000117… 2016-11-19 FALSE FALSE 97.3 1970-01-01 07:57:00
## 6 000117… 2016-11-20 FALSE FALSE 97.5 1970-01-01 08:01:00
## 7 000117… 2016-11-21 FALSE FALSE 97.0 1970-01-01 07:05:00
## 8 000117… 2016-11-22 FALSE FALSE 96.9 1970-01-01 07:33:00
## 9 000117… 2016-11-23 FALSE FALSE 97.1 1970-01-01 06:09:00
## 10 000117… 2016-11-24 FALSE FALSE 97.1 1970-01-01 07:39:00
## # … with 107,994 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[58]]
## # A tibble: 69,189 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00a812… 2015-03-14 TRUE FALSE NA NA
## 2 00a812… 2015-03-15 FALSE FALSE NA NA
## 3 00a812… 2015-03-16 FALSE FALSE NA NA
## 4 00a812… 2015-03-17 FALSE FALSE NA NA
## 5 00a812… 2015-03-18 FALSE FALSE NA NA
## 6 00a812… 2015-03-21 FALSE FALSE NA NA
## 7 00a812… 2015-03-23 FALSE FALSE NA NA
## 8 00a812… 2015-04-09 FALSE FALSE NA NA
## 9 00a812… 2015-04-12 TRUE FALSE NA NA
## 10 00a812… 2015-04-13 FALSE FALSE NA NA
## # … with 69,179 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[59]]
## # A tibble: 94,125 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000c6d… 2017-07-15 TRUE FALSE NA NA
## 2 000c6d… 2017-07-16 FALSE FALSE NA NA
## 3 000c6d… 2017-07-17 FALSE FALSE NA NA
## 4 000c6d… 2017-07-18 FALSE FALSE NA NA
## 5 000c6d… 2017-08-12 FALSE FALSE NA NA
## 6 000c6d… 2017-08-28 FALSE FALSE NA NA
## 7 000c6d… 2017-09-02 TRUE FALSE NA NA
## 8 000c6d… 2017-09-03 FALSE FALSE NA NA
## 9 000c6d… 2017-09-04 FALSE FALSE NA NA
## 10 000c6d… 2017-09-05 FALSE FALSE NA NA
## # … with 94,115 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[60]]
## # A tibble: 120,286 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000864… 2014-08-24 TRUE FALSE NA NA
## 2 000864… 2014-08-25 FALSE FALSE NA NA
## 3 000864… 2014-08-26 FALSE FALSE NA NA
## 4 0014b8… 2015-01-27 TRUE TRUE NA NA
## 5 0014b8… 2015-02-01 FALSE FALSE NA NA
## 6 0014b8… 2015-02-02 FALSE FALSE NA NA
## 7 0014b8… 2015-02-03 FALSE FALSE 97.2 NA
## 8 0014b8… 2015-02-04 FALSE FALSE 97.5 NA
## 9 0014b8… 2015-02-05 FALSE FALSE 97.5 NA
## 10 0014b8… 2015-02-06 FALSE FALSE 97.5 NA
## # … with 120,276 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[61]]
## # A tibble: 73,012 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 003402… 2015-09-07 TRUE FALSE NA NA
## 2 003402… 2015-09-08 FALSE FALSE NA NA
## 3 003402… 2015-09-09 FALSE FALSE NA NA
## 4 003402… 2015-09-10 FALSE FALSE NA NA
## 5 003402… 2015-10-14 TRUE FALSE NA NA
## 6 003402… 2015-10-15 FALSE FALSE NA NA
## 7 003402… 2015-10-16 FALSE FALSE NA NA
## 8 003402… 2015-10-17 FALSE FALSE NA NA
## 9 003402… 2015-11-17 TRUE FALSE NA NA
## 10 003402… 2015-11-18 FALSE FALSE NA NA
## # … with 73,002 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[62]]
## # A tibble: 111,849 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 00017c… 2016-12-15 TRUE FALSE NA NA
## 2 00017c… 2016-12-16 FALSE FALSE NA NA
## 3 00017c… 2016-12-17 FALSE FALSE NA NA
## 4 00017c… 2016-12-18 FALSE FALSE NA NA
## 5 00017c… 2016-12-19 FALSE FALSE NA NA
## 6 00017c… 2016-12-20 FALSE FALSE NA NA
## 7 00017c… 2016-12-24 FALSE FALSE NA NA
## 8 00017c… 2016-12-27 FALSE FALSE NA NA
## 9 00017c… 2016-12-30 FALSE FALSE NA NA
## 10 00017c… 2017-01-03 FALSE FALSE NA NA
## # … with 111,839 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[63]]
## # A tibble: 40,759 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000230… 2014-09-01 FALSE FALSE NA NA
## 2 00354b… 2015-12-15 TRUE FALSE NA NA
## 3 00354b… 2015-12-16 FALSE FALSE NA NA
## 4 00354b… 2015-12-17 FALSE FALSE NA NA
## 5 00354b… 2015-12-18 FALSE FALSE NA NA
## 6 00354b… 2015-12-19 FALSE FALSE NA NA
## 7 00354b… 2015-12-20 FALSE FALSE NA NA
## 8 00354b… 2016-01-13 TRUE FALSE NA NA
## 9 00354b… 2016-01-14 FALSE FALSE NA NA
## 10 00354b… 2016-01-15 FALSE FALSE NA NA
## # … with 40,749 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
##
## [[64]]
## # A tibble: 45,786 x 37
## user_id date first_day conception temperature temp_time
## <chr> <date> <lgl> <lgl> <dbl> <dttm>
## 1 000628… 2016-06-18 TRUE FALSE NA NA
## 2 000628… 2016-06-19 FALSE FALSE NA NA
## 3 000628… 2016-06-20 FALSE FALSE NA NA
## 4 000628… 2016-06-21 FALSE FALSE NA NA
## 5 000628… 2016-06-22 FALSE FALSE NA NA
## 6 000628… 2016-06-23 FALSE FALSE NA NA
## 7 000628… 2016-06-24 FALSE FALSE NA NA
## 8 000628… 2016-06-25 FALSE FALSE NA NA
## 9 000628… 2016-06-26 FALSE FALSE NA NA
## 10 000628… 2016-06-27 FALSE FALSE NA NA
## # … with 45,776 more rows, and 31 more variables: temp_source <int>,
## # questionable_temp <lgl>, no_fluid <lgl>, fluid_sticky <int>,
## # fluid_creamy <int>, fluid_eggwhite <int>, fluid_watery <int>,
## # cervix_height <int>, cervix_openness <int>, cervix_firmness <int>,
## # opk <int>, preg_test <dbl>, menstruation <int>, spotting <lgl>,
## # sex <int>, custom <chr>, moods <chr>, symptoms <chr>,
## # preg_test_o <int>, input_file_id <chr>, batch <dbl>,
## # is_first_day <lgl>, first_day_type <dbl>, day_id <chr>,
## # cycle_nb <dbl>, cycle_id <fct>, cycle_length <dbl>, cycleday <dbl>,
## # cycleday_from_end <dbl>, fertility_counting <dbl>,
## # fertility_counting_n <dbl>
stopImplicitCluster()reproductive_obj_score = function(fertility, fertility_n, sex){
score = sum(fertility_n * (sex == 2)) +
-1 * sum(fertility * (sex == 1)) +
-0.5 * sum(fertility * (sex == 3)) +
2 * any(sex == 4)
return(score)
}
tmp_folder = paste0(IO$tmp_data, "Days_with_fertility/")
days_files = list.files(tmp_folder)
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
cycles_agg = foreach(file = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
days = read_feather(path = paste0(tmp_folder,file))
cycles_agg = ddply(days,
"cycle_id",
summarise,
reprod_obj_score = reproductive_obj_score(fertility_counting, fertility_counting_n, sex))
return(cycles_agg)
}
stopImplicitCluster()
cycles$reprod_obj_score_counting = cycles_agg$reprod_obj_score[match(cycles$cycle_id, cycles_agg$cycle_id)]
write_feather(cycles, path = paste0(IO$output_data, "cycles.feather"))
ok = file.copy(from = paste0(IO$output_data, "cycles.feather") , to = paste0(IO$tmp_data, "cycles_with_reprod_obj_scores.feather"), overwrite = TRUE)score_agg = aggregate(reprod_obj_score_counting ~ pregnancy_id , cycles, median, na.rm = TRUE)
pregnancies$reprod_obj_score_counting = score_agg$reprod_obj_score_counting[match(pregnancies$pregnancy_id, score_agg$pregnancy_id)]
sex_agg = aggregate(n_tot_sex ~ pregnancy_id, cycles, sum, na.rm = TRUE)
pregnancies$n_tot_sex = sex_agg$n_tot_sex[match(pregnancies$pregnancy_id, sex_agg$pregnancy_id)]
pregnancies$reprod_obj_counting = cut(pregnancies$reprod_obj_score_counting,
breaks = c(-Inf, -0.1,0.1,Inf),
labels = c("avoid_preg","unknown","get_preg"))
ggplot(pregnancies, aes(x = reprod_obj_score_counting, fill = reprod_obj_counting))+
geom_histogram(binwidth = 0.1)## Warning: Removed 30854 rows containing non-finite values (stat_bin).
write_feather(pregnancies, path = paste0(IO$output_data,"pregnancies.feather"))
file.copy(from = paste0(IO$output_data,"pregnancies.feather"), to = paste0(IO$tmp_data,"pregnancies_with_3_cycles_score.feather") , overwrite = TRUE)## [1] TRUE
ggplot(pregnancies[which(pregnancies$reprod_obj_score_counting != 0),],
aes(x = reprod_obj_score_counting, fill = reprod_obj_counting))+
geom_histogram(position = "identity", binwidth = 0.1, alpha = 1) +
facet_grid(reprod_obj_app ~ . , scale = "free")+
geom_vline(xintercept = 0)+
xlim(c(-3,3))## Warning: Removed 364 rows containing non-finite values (stat_bin).
## Warning: Removed 30 rows containing missing values (geom_bar).
ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj_counting) )+
geom_histogram(position = "identity",binwidth = 7, alpha = 0.5) +
facet_grid(reprod_obj_counting ~ . , scale = "free")+
xlim(0,1000)## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj_counting) )+
geom_bar() +
facet_grid(reprod_obj_counting ~ . , scale = "free")Here again, it seems that the users who are trying to avoid pregnancy have proportionally less induced or spontaneous pregnancy losses.
The 3 previous cycles may not reflect the intentions of the user at the cycle at which they got pregnant, as they may have changed their mind and got pregnant quite rapidly.
But maybe, we can guess whether they were trying to avoid a pregnancy if they logged protected sex or withdrawal before they logged their first positive pregnancy test. If they were, it may mean that these users got pregnant from a mis-estimation of their fertile window.
cycle_ids = cycles$cycle_id[!is.na(cycles$preg_outcome)]
input_days_folder = paste0(IO$output_data,"Days/")
tic()
cl = makeCluster(par$n_cores)
registerDoParallel(cl)
days_files = list.files(input_days_folder)
cycles_agg = foreach(file = days_files, .combine = rbind, .packages = c('plyr','dplyr','feather')) %dopar%
{
days = read_feather(path = paste0(input_days_folder,file))
days$fluid_eggwhite[is.na(days$fluid_eggwhite)] = 0
days$fluid_watery[is.na(days$fluid_watery)] = 0
days$fertile_mucus = days$fluid_eggwhite + days$fluid_watery
j = which(days$cycle_id %in% cycle_ids)
days = days[j,]
days_first_pos_preg_test = cycles$day_first_pos_preg_test[match(days$cycle_id, cycles$cycle_id)]
j = which(days$cycleday < days_first_pos_preg_test)
cycles_agg = ddply(days[j,],
"cycle_id",
summarise,
n_prot_sex_no_mucus = sum((sex == 1) & (fertile_mucus == 0), na.rm = TRUE),
n_withdrawal_no_mucus = sum((sex == 3) & (fertile_mucus == 0), na.rm = TRUE),
n_unprot_sex_no_mucus = sum((sex == 2) & (fertile_mucus == 0), na.rm = TRUE),
n_insemination_no_mucus = sum((sex == 4) & (fertile_mucus == 0), na.rm = TRUE),
n_prot_sex_mucus = sum((sex == 1) & (fertile_mucus > 0), na.rm = TRUE),
n_withdrawal_mucus = sum((sex == 3) & (fertile_mucus > 0), na.rm = TRUE),
n_unprot_sex_mucus = sum((sex == 2) & (fertile_mucus > 0), na.rm = TRUE),
n_insemination_mucus = sum((sex == 4) & (fertile_mucus > 0), na.rm = TRUE))
return(cycles_agg)
}
stopImplicitCluster()
toc()## 153.937 sec elapsed
cycles_agg_long = melt(cycles_agg, id.vars = "cycle_id")
ggplot(cycles_agg_long, aes(x = value, fill = variable)) + geom_histogram() + facet_grid(variable ~ ., scale = "free") + xlim(c(0.2,30))## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 572967 rows containing non-finite values (stat_bin).
## Warning: Removed 16 rows containing missing values (geom_bar).
#table(pmin(5, cycles_agg$n_prot_sex), pmin(10, cycles_agg$n_unprot_sex))m = match(pregnancies$pregnancy_id, cycles_agg$cycle_id)
colnames_to_add = colnames(cycles_agg[,-1])
for(col_ in colnames_to_add){
eval(parse(text = paste0("pregnancies$",col_," = cycles_agg$",col_,"[m]")))
}pregnancies$reprod_obj_score_in_cycle = 0 +
- 2*pregnancies$n_prot_sex_mucus +
- 1*pregnancies$n_prot_sex_no_mucus +
- 1 * pregnancies$n_withdrawal_mucus +
- 0.5 * pregnancies$n_withdrawal_no_mucus
pregnancies$reprod_obj_score_in_cycle[pregnancies$n_unprot_sex_mucus > 1] = 2
pregnancies$reprod_obj_score_in_cycle[(pregnancies$n_insemination_no_mucus + pregnancies$n_insemination_mucus)>0] = 10
pregnancies$reprod_obj_score_in_cycle[is.na(pregnancies$reprod_obj_score_in_cycle)] = 0
pregnancies$reprod_obj_in_cycle = cut(pregnancies$reprod_obj_score_in_cycle, breaks = c(-Inf, -0.5, 0.5, Inf),
labels = c("avoid_preg","unknown","get_preg"))ggplot(pregnancies[pregnancies$reprod_obj_score_in_cycle <0,], aes(x = reprod_obj_score_in_cycle, fill = reprod_obj_app))+
geom_histogram(binwidth = 0.5)+
facet_grid(reprod_obj_app ~. , scale = "free")table(pregnancies$reprod_obj_in_cycle, pregnancies$reprod_obj_app)##
## avoid_preg get_preg other preg track_period
## avoid_preg 934 1277 11 2890 775
## unknown 7818 33038 206 100661 9207
## get_preg 2159 4607 23 11000 2320
ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj_in_cycle) )+
geom_histogram(position = "identity",binwidth = 7, alpha = 0.5) +
facet_grid(reprod_obj_in_cycle ~ . , scale = "free")+
xlim(0,1000)## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_bar).
ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj_in_cycle) )+
geom_bar() +
facet_grid(reprod_obj_in_cycle ~ . , scale = "free")If a user declared not wanting to get pregnant and logged sexual intercourses in the 3 previous cycles and in the conception cycle that were consistent with this declared objective, we can assume that this user was indeed trying to avoid pregnancy.
If, on the other side of the spectrum, a user declared that they wanted to achieve pregnancy and were acting consistently or received insemination, we can safely assume that they wanted to get pregnant.
In all other cases, we will label the user with an “unknown” reproductive objective for the specific pregnancies.
pregnancies$reprod_obj = "unknown"
j = which((pregnancies$reprod_obj_app == "avoid_preg") &
((pregnancies$reprod_obj_counting == "avoid_preg") |
(is.na(pregnancies$reprod_obj_counting) & (pregnancies$cycle_nb < 4))) &
(pregnancies$reprod_obj_in_cycle %in% c("avoid_preg","unknown")))
pregnancies$reprod_obj[j] = "avoid_preg"
j = which((pregnancies$reprod_obj_app %in% c("get_preg","preg")) &
((pregnancies$reprod_obj_counting == "get_preg") |
(is.na(pregnancies$reprod_obj_counting) & (pregnancies$cycle_nb < 4))) &
(pregnancies$reprod_obj_in_cycle %in% c("get_preg","unknown")))
pregnancies$reprod_obj[j] = "get_preg"
table(pregnancies$reprod_obj)##
## avoid_preg get_preg unknown
## 3809 71902 101215
table(pregnancies$reprod_obj_app)##
## avoid_preg get_preg other preg track_period
## 10911 38922 240 114551 12302
table(pregnancies$reprod_obj_counting)##
## avoid_preg unknown get_preg
## 14456 75303 56313
table(pregnancies$reprod_obj_in_cycle)##
## avoid_preg unknown get_preg
## 5887 150930 20109
write_feather(pregnancies, path = paste0(IO$output_data,"pregnancies.feather"))
file.copy(from = paste0(IO$output_data,"pregnancies.feather"), to = paste0(IO$tmp_data,"pregnancies_with_reprod_obj.feather") , overwrite = TRUE)## [1] TRUE
users$reprod_obj_at_first_pregnancy = pregnancies$reprod_obj[match(paste0(users$user_id, "_",users$first_cycle_preg),pregnancies$pregnancy_id)]
table(users$reprod_obj_at_first_pregnancy, users$reprod_obj_app)##
## avoid_preg get_preg other preg track_period
## avoid_preg 3415 0 0 0 0
## get_preg 0 17542 0 39159 0
## unknown 5182 14438 199 46492 9138
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_reprod_obj_at_first_pregnancy.feather"), overwrite = TRUE)## [1] TRUE
ggplot(pregnancies, aes(x = preg_duration, fill = reprod_obj) )+
geom_histogram(position = "identity",binwidth = 7, alpha = 0.5) +
facet_grid(reprod_obj ~ . , scale = "free")+
xlim(0,1000)## Warning: Removed 93929 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_bar).
ggplot(pregnancies, aes(x = preg_outcome, fill = reprod_obj) )+
geom_bar() +
facet_grid(reprod_obj ~ . , scale = "free")knitr::opts_chunk$set(echo = TRUE, cache = TRUE)users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
#load(paste0(IO$output_data, "days.Rdata"), verbose = TRUE)g = ggplot(cycles, aes(x = cycle_length, fill = preg_test_class)) +
geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity")+
xlim(c(0,750))+
facet_grid(preg_test_class ~ ., scale = "free")
g## Warning: Removed 142683 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
g = ggplot(cycles[cycles$preg_test_class != "pregnant",], aes(x = cycle_length, fill = preg_test_class)) +
geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity", alpha = 0.5)+
xlim(c(0,150))
#facet_grid(preg_test_class ~ .)
g## Warning: Removed 71625 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
g_hist_lt = ggplot(cycles, aes(x = cycle_length, fill = preg_test_class)) +
geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
geom_histogram(aes(y = ..density..), binwidth = 7, position = "identity", alpha = 0.5)+
scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7, labels = viz$xaxis_m*4, limits = c(0,20*28))+
ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
scale_fill_discrete(name = "Cycle label")+ theme(legend.position = "bottom")
g_hist_lt## Warning: Removed 155328 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
g_hist_st = ggplot(cycles, aes(x = cycle_length, fill = preg_test_class)) +
geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity", alpha = 0.5)+
scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7, labels = viz$xaxis_m*4, limits = c(0,150))+
ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
guides(fill = FALSE)
#facet_grid(preg_test_class ~ .)
g_hist_st## Warning: Removed 209196 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_vline).
## Warning: Removed 8 rows containing missing values (geom_bar).
g_inset = ggplotGrob(g_hist_st +
theme(plot.background = element_rect(colour = "gray40")))## Warning: Removed 209196 rows containing non-finite values (stat_bin).
## Warning: Removed 6 rows containing missing values (geom_vline).
## Warning: Removed 8 rows containing missing values (geom_bar).
g_hist_lt + annotation_custom(
grob = g_inset,
xmin = 24*7,
xmax = Inf,
ymin = 0.03,
ymax = Inf
)## Warning: Removed 155328 rows containing non-finite values (stat_bin).
## Warning: Removed 8 rows containing missing values (geom_bar).
cycles$reprod_obj = users$reprod_obj[match(cycles$user_id, users$user_id)]
g_hist_lt = ggplot(cycles[!is.na(cycles$preg_test_class),], aes(x = cycle_length, fill = reprod_obj )) +
geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
geom_histogram(aes(y = ..density..), binwidth = 7, position = "identity", alpha = 0.5)+
scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7, labels = viz$xaxis_m*4, limits = c(0,20*28))+
ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
scale_fill_discrete(name = "Cycle label")+ theme(legend.position = "bottom")+
facet_grid(preg_test_class~.)
g_hist_lt
g_hist_st = ggplot(cycles[!is.na(cycles$preg_test_class),], aes(x = cycle_length, fill = reprod_obj)) +
geom_vline(xintercept = dict$pregnancy_timeline$duration_in_days, col = "gray", linetype = 2)+
geom_histogram(aes(y = ..density..), binwidth = 1, position = "identity", alpha = 0.5)+
scale_x_continuous(breaks = viz$xaxis_m*28, minor_breaks = viz$xaxis_s*7, labels = viz$xaxis_m*4, limits = c(0,150))+
ylab("% of cycles")+ xlab("cycle or pregnancy duration (in weeks)")+
facet_grid(preg_test_class ~ .)
g_hist_st
g_inset = ggplotGrob(g_hist_st +
theme(plot.background = element_rect(colour = "gray40")))
g_hist_lt + annotation_custom(
grob = g_inset,
xmin = 24*7,
xmax = Inf,
ymin = 0.03,
ymax = Inf
)j = which(cycles$preg_test_class == "pregnant")
ggplot(cycles[j,], aes(x = preg_outcome, fill = preg_outcome_cat)) +
geom_bar(aes(y = (..count..)/sum(..count..)))+
xlab("Pregnancy outcome")+ ylab("% cycles")+
scale_fill_manual(values = dict$pregnancy_outcomes$colors)+
scale_y_continuous(labels=percent)+
guides(fill = FALSE)users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
colnames(cycles)## [1] "user_id"
## [2] "start_date"
## [3] "first_day_type"
## [4] "cycle_nb"
## [5] "cycle_id"
## [6] "end_date"
## [7] "cycle_length"
## [8] "n_days_obs"
## [9] "last_obs_day"
## [10] "n_pos_preg_test"
## [11] "n_neg_preg_test"
## [12] "day_from_end_first_pos_preg_test"
## [13] "day_last_pos_preg_test"
## [14] "day_last_preg_test"
## [15] "n_tot_sex"
## [16] "n_prot_sex"
## [17] "n_unprot_sex"
## [18] "n_withdrawal"
## [19] "n_insemination"
## [20] "n_BBT"
## [21] "day_first_pos_preg_test"
## [22] "n_days_obs_after_first_pos_preg_test"
## [23] "last_preg_test"
## [24] "preg_test_class"
## [25] "preg_type"
## [26] "birth_year"
## [27] "current_age"
## [28] "reprod_obj_app"
## [29] "preg_outcome_based_on_duration"
## [30] "cycle_length_next_cycle"
## [31] "preg_outcome_cat"
## [32] "preg_outcome"
## [33] "cycle_nb_from_next_preg"
## [34] "pregnancy_id"
## [35] "reprod_obj_score_counting"
cycles$cycle_nb_first_pos_preg_test = users$first_cycle_preg[match(cycles$user_id, users$user_id)]
cycles$cycles_before_preg_4 = (cycles$cycle_nb >= (cycles$cycle_nb_first_pos_preg_test - 4)) &
(cycles$cycle_nb <= (cycles$cycle_nb_first_pos_preg_test - 1))
cycles$cycles_before_preg_4[cycles$cycle_nb_first_pos_preg_test < 5] = FALSE
users_cycles_stats = ddply(cycles[which(cycles$cycles_before_preg_4),],
.(user_id),
.fun = summarize,
avg_cycle_length_4 = mean(cycle_length, na.rm = TRUE),
median_cycle_length_4 = median(cycle_length, na.rm = TRUE),
sd_cycle_length_4 = sd(cycle_length, na.rm = TRUE)
)
head(users_cycles_stats)## user_id avg_cycle_length_4
## 1 00011775cedac39f02b5cf431b2b2b4df8cf1fa7 32.25
## 2 00016999bb7faafc7c29c14dc63c436d1e2ca280 25.75
## 3 00027fe63a06d113e3cfb608c21281315127dbe2 31.00
## 4 00033c627144e7585875d7c3fe3bbe9459fc3a3b 28.25
## 5 000416061bb0401fdf710c414968f48de041e2c1 34.25
## 6 0005161ad07f63eaa0c98e59537a83fe2a4c44eb 30.25
## median_cycle_length_4 sd_cycle_length_4
## 1 32.0 2.2173558
## 2 25.5 0.9574271
## 3 30.5 1.4142136
## 4 28.0 1.2583057
## 5 34.0 2.9860788
## 6 31.0 3.8622101
column_names = colnames(users_cycles_stats)
column_names = column_names[-which(column_names %in% colnames(users))]
m = match(users$user_id, users_cycles_stats$user_id)
for(column in column_names){
eval(parse(text = paste0("users$",column,"= users_cycles_stats$",column,"[m]")))
#eval(parse(text = paste0("users$",column,"[is.na(users$",column,")]= 0")))
}
# we also need to record the outcome of the first pregnancy
users_first_preg_outcome = cycles[which(cycles$cycle_nb == cycles$cycle_nb_first_pos_preg_test),c("user_id","preg_outcome")]
users$first_preg_outcome = users_first_preg_outcome$preg_outcome[match(users$user_id, users_first_preg_outcome$user_id)]
users$first_preg_outcome_simple = ifelse(users$first_preg_outcome %in% c("TB noBF","BF","PTB"), "LB", #"ExPTB",
ifelse(users$first_preg_outcome %in% c("EPL","LPL"),"PL",NA))
# and the time to first pregnancy test
users_date_first_preg_test = cycles[which(cycles$cycle_nb == cycles$cycle_nb_first_pos_preg_test),c("user_id","start_date")]
users$time_to_first_pos_test_in_days = as.numeric(
users_date_first_preg_test$start_date[match(users$user_id,users_date_first_preg_test$user_id )] -
users$earliest_date)
users$time_to_first_pos_test = users$time_to_first_pos_test_in_days /365
#
write_feather(users, path = paste0(IO$output_data,"users.feather"))
file.copy(from = paste0(IO$output_data,"users.feather"), to = paste0(IO$tmp_data,"users_with_stats_4_cycles.feather"), overwrite = TRUE)## [1] TRUE
users$any_PL = ifelse(users$n_PL>0, TRUE, ifelse(users$n_LB>0,FALSE,NA))
u = users[which(!is.na(users$any_PL)),]
glm_cl = glm(
any_PL ~ cycle_length_before_preg_avg +
cycle_length_before_preg_median +
cycle_length_before_preg_sd +
age_now,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = any_PL ~ cycle_length_before_preg_avg + cycle_length_before_preg_median +
## cycle_length_before_preg_sd + age_now, family = "binomial",
## data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8483 -1.0944 -0.9351 1.2301 1.7434
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.336e+00 1.262e-01 -18.515 <2e-16 ***
## cycle_length_before_preg_avg 9.169e-07 5.285e-04 0.002 0.9986
## cycle_length_before_preg_median 2.693e-03 8.479e-04 3.175 0.0015 **
## cycle_length_before_preg_sd 3.255e-05 9.690e-05 0.336 0.7369
## age_now 6.076e-02 3.580e-03 16.973 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 20632 on 14977 degrees of freedom
## Residual deviance: 20320 on 14973 degrees of freedom
## (31333 observations deleted due to missingness)
## AIC: 20330
##
## Number of Fisher Scoring iterations: 5
glm_cl = glm(
any_PL ~ cycle_length_before_preg_median,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = any_PL ~ cycle_length_before_preg_median, family = "binomial",
## data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.983 -1.117 -1.116 1.240 1.246
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.1601892 0.0118145 -13.559 < 2e-16 ***
## cycle_length_before_preg_median 0.0005209 0.0001856 2.806 0.00501 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 56867 on 41169 degrees of freedom
## Residual deviance: 56857 on 41168 degrees of freedom
## (5141 observations deleted due to missingness)
## AIC: 56861
##
## Number of Fisher Scoring iterations: 3
glm_cl = glm(
any_PL ~ cycle_length_before_preg_sd,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = any_PL ~ cycle_length_before_preg_sd, family = "binomial",
## data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.582 -1.101 -1.101 1.256 1.256
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.822e-01 1.047e-02 -17.410 <2e-16 ***
## cycle_length_before_preg_sd 9.842e-06 1.117e-05 0.881 0.378
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 50799 on 36862 degrees of freedom
## Residual deviance: 50799 on 36861 degrees of freedom
## (9448 observations deleted due to missingness)
## AIC: 50803
##
## Number of Fisher Scoring iterations: 3
ggplot(u, aes(x = cycle_length_before_preg_avg, fill = any_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1)## Warning: Removed 5141 rows containing non-finite values (stat_bin).
ggplot(u, aes(x = cycle_length_before_preg_median, fill = any_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1)## Warning: Removed 5141 rows containing non-finite values (stat_bin).
ggplot(u, aes(x = cycle_length_before_preg_median, fill = any_PL))+
geom_density(col = NA, alpha = 0.5, bw = 2)## Warning: Removed 5141 rows containing non-finite values (stat_density).
ggplot(u, aes(x = log(cycle_length_before_preg_sd), fill = any_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1)## Warning: Removed 9805 rows containing non-finite values (stat_bin).
u = users[which(!is.na(users$first_preg_outcome_simple)),]
u$is_first_preg_a_PL = (u$first_preg_outcome_simple == "PL")
glm_cl = glm(
is_first_preg_a_PL ~ avg_cycle_length_4 +
median_cycle_length_4 +
sd_cycle_length_4,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = is_first_preg_a_PL ~ avg_cycle_length_4 + median_cycle_length_4 +
## sd_cycle_length_4, family = "binomial", data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.7805 -1.0143 -0.9978 1.3456 1.6253
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.075956 0.080433 -0.944 0.344998
## avg_cycle_length_4 -0.029788 0.007109 -4.190 2.78e-05 ***
## median_cycle_length_4 0.017206 0.004775 3.603 0.000314 ***
## sd_cycle_length_4 0.016870 0.003564 4.733 2.21e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34933 on 25887 degrees of freedom
## Residual deviance: 34876 on 25884 degrees of freedom
## (15831 observations deleted due to missingness)
## AIC: 34884
##
## Number of Fisher Scoring iterations: 4
glm_cl = glm(
is_first_preg_a_PL ~ median_cycle_length_4,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = is_first_preg_a_PL ~ median_cycle_length_4, family = "binomial",
## data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.097 -1.017 -1.016 1.346 1.354
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.420955 0.044052 -9.556 <2e-16 ***
## median_cycle_length_4 0.001071 0.001363 0.785 0.432
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34933 on 25887 degrees of freedom
## Residual deviance: 34932 on 25886 degrees of freedom
## (15831 observations deleted due to missingness)
## AIC: 34936
##
## Number of Fisher Scoring iterations: 4
glm_cl = glm(
is_first_preg_a_PL ~ sd_cycle_length_4,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = is_first_preg_a_PL ~ sd_cycle_length_4, family = "binomial",
## data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.746 -1.012 -1.010 1.352 1.355
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.4093681 0.0131568 -31.11 < 2e-16 ***
## sd_cycle_length_4 0.0018622 0.0003063 6.08 1.2e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34933 on 25887 degrees of freedom
## Residual deviance: 34895 on 25886 degrees of freedom
## (15831 observations deleted due to missingness)
## AIC: 34899
##
## Number of Fisher Scoring iterations: 4
glm_cl = glm(
is_first_preg_a_PL ~
sd_cycle_length_4+
median_cycle_length_4,
data = u,
family = "binomial")
summary(glm_cl)##
## Call:
## glm(formula = is_first_preg_a_PL ~ sd_cycle_length_4 + median_cycle_length_4,
## family = "binomial", data = u)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.801 -1.013 -1.010 1.351 1.434
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.3531764 0.0456549 -7.736 1.03e-14 ***
## sd_cycle_length_4 0.0020000 0.0003259 6.137 8.39e-10 ***
## median_cycle_length_4 -0.0018667 0.0014528 -1.285 0.199
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 34933 on 25887 degrees of freedom
## Residual deviance: 34893 on 25885 degrees of freedom
## (15831 observations deleted due to missingness)
## AIC: 34899
##
## Number of Fisher Scoring iterations: 4
ggplot(u, aes(x = avg_cycle_length_4, fill = is_first_preg_a_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1) + xlim(c(0,100))## Warning: Removed 16347 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
ggplot(u, aes(x = median_cycle_length_4, fill = is_first_preg_a_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1) + xlim(c(0,100))## Warning: Removed 15906 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
ggplot(u, aes(x = log10(sd_cycle_length_4), fill = is_first_preg_a_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 0.1) ## Warning: Removed 15945 rows containing non-finite values (stat_bin).
ggplot(u, aes(x = sd_cycle_length_4, fill = is_first_preg_a_PL))+
geom_histogram(col = NA, alpha = 0.5, position = "identity", binwidth = 1) + xlim(c(0,50))## Warning: Removed 16809 rows containing non-finite values (stat_bin).
## Warning: Removed 4 rows containing missing values (geom_bar).
Loading users and days tables.
users = read_feather(path = paste0(IO$output_data, "users.feather"))
cycles = read_feather(path = paste0(IO$output_data, "cycles.feather"))
pregnancies = read_feather(path = paste0(IO$output_data, "pregnancies.feather"))j_PL = which((users$n_PL == 1) &
(users$n_preg == 1) &
(users$user_type == 1) &
(users$n_cycles %in% 15:20) &
(users$n_days_obs %in% 300:400)) j_LB = which((users$n_LB == 1) &
(users$n_preg == 1) &
(users$user_type == 1)&
(users$n_cycles %in% 15:20) &
(users$n_days_obs %in% 300:400))j = unique(c(j_LB, j_PL))
batches = table(users$batch[j]) %>% sort(decreasing = TRUE) %>% names() %>% head(10)
tic()
days = foreach(b = batches, .combine = rbind, .packages = "feather") %do%{
cat(b, "\n")
days = read_feather(path = paste0(IO$output_data,"Days/days_",b,".feather"))
k = which(days$user_id %in% users$user_id[j])
days = days[k,]
return(days)
}## 58
## 14
## 32
## 63
## 10
## 17
## 23
## 25
## 33
## 4
toc()## 30.127 sec elapsed
write_feather(days, path = paste0(IO$tmp_data,"days_selected_users_examples_1.feather"))for(user in unique(days$user_id)){
cat("\n",user, "\n")
d = days[which((days$user_id == user) & (days$cycle_nb >= 1)),]
plot.tracking.history(d = d, show_tests = TRUE, average_temp = FALSE)
}selected_users_LB = c("5b8a94bc60f1aad5ae030be0dddfbaf7783d99f5",
"3232d37bc6999d32faffca55ee3c67d3832ca8e1",
"d4a285083dac365f5ef76992bb14a885f69da1a6",
"f5e9deb6f71eaedb7aeb91b4637aaf80d910ee75",
"8f26d4876c83fde806ad847334b9c8dfbc48f8f6",
"5fa514a52a3863f33fc37579fac513911016ead0",
"aab98dc98261e99772693ffa81cafb6839e0316e")
selected_users_PL = c("fbf10d778485d325bddffad0c24010e9fbcbaa32",
"761e792a2309f4873d07ec95f6f595eb18d64621",
"a1862179ab04585bd5d26a325152e2ac377a6ea2",
"afdc30d1a29d04fd47ecd67cacf1bd22148ffdf8",
"6b99ee04c897ffe7916527b020fd031f99518ca7"
)
others = c("83299899d84f008adad561c75cc7c3cf82106f8f")
user_ids = c(selected_users_LB, selected_users_PL, others)
all(user_ids %in% unique(days$user_id))## [1] TRUE
for(user in user_ids){
cat("\n",user, "\n")
d = days[which((days$user_id == user) & (days$cycle_nb >= 1)),]
plot.tracking.history(d = d, show_tests = TRUE, average_temp = FALSE)
}##
## 5b8a94bc60f1aad5ae030be0dddfbaf7783d99f5
##
## 3232d37bc6999d32faffca55ee3c67d3832ca8e1
##
## d4a285083dac365f5ef76992bb14a885f69da1a6
##
## f5e9deb6f71eaedb7aeb91b4637aaf80d910ee75
##
## 8f26d4876c83fde806ad847334b9c8dfbc48f8f6
##
## 5fa514a52a3863f33fc37579fac513911016ead0
##
## aab98dc98261e99772693ffa81cafb6839e0316e
##
## fbf10d778485d325bddffad0c24010e9fbcbaa32
##
## 761e792a2309f4873d07ec95f6f595eb18d64621
##
## a1862179ab04585bd5d26a325152e2ac377a6ea2
##
## afdc30d1a29d04fd47ecd67cacf1bd22148ffdf8
##
## 6b99ee04c897ffe7916527b020fd031f99518ca7
##
## 83299899d84f008adad561c75cc7c3cf82106f8f
user_ids = c("5b8a94bc60f1aad5ae030be0dddfbaf7783d99f5",
"3232d37bc6999d32faffca55ee3c67d3832ca8e1",
"761e792a2309f4873d07ec95f6f595eb18d64621",
"d4a285083dac365f5ef76992bb14a885f69da1a6")
days = days[which(days$user_id %in% user_ids),]
save(days, file = paste0(IO$tmp_data,"days_selected_users.Rdata"))